* config.gcc (i[34567]86-*-solaris2*, sparc64-*-solaris2*)
[official-gcc.git] / gcc / config / i386 / i386.c
blob8e5b75289ba4710c8d28a64a59f8cdcc79b135f2
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 2, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 2, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 const int x86_branch_hints = m_PENT4 | m_NOCONA;
531 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
532 const int x86_partial_reg_stall = m_PPRO;
533 const int x86_use_loop = m_K6;
534 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
535 const int x86_use_mov0 = m_K6;
536 const int x86_use_cltd = ~(m_PENT | m_K6);
537 const int x86_read_modify_write = ~m_PENT;
538 const int x86_read_modify = ~(m_PENT | m_PPRO);
539 const int x86_split_long_moves = m_PPRO;
540 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
541 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
542 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
543 const int x86_qimode_math = ~(0);
544 const int x86_promote_qi_regs = 0;
545 const int x86_himode_math = ~(m_PPRO);
546 const int x86_promote_hi_regs = m_PPRO;
547 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
555 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
557 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
558 const int x86_shift1 = ~m_486;
559 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs = m_ATHLON_K8;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568 const int x86_sse_typeless_stores = m_ATHLON_K8;
569 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
570 const int x86_use_ffreep = m_ATHLON_K8;
571 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
572 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
573 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
580 epilogue code. */
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
593 /* ax, dx, cx, bx */
594 AREG, DREG, CREG, BREG,
595 /* si, di, bp, sp */
596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
597 /* FP registers */
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
600 /* arg pointer */
601 NON_Q_REGS,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
605 SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
607 MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
633 static int const x86_64_int_return_registers[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
694 numbers.
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0 = NULL_RTX;
719 rtx ix86_compare_op1 = NULL_RTX;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry GTY(())
729 unsigned short mode;
730 unsigned short n;
731 rtx rtl;
732 struct stack_local_entry *next;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
738 [arguments]
739 <- ARG_POINTER
740 saved pc
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
744 [saved regs]
746 [padding1] \
748 [va_arg registers] (
749 > to_allocate <- FRAME_POINTER
750 [frame] (
752 [padding2] /
754 struct ix86_frame
756 int nregs;
757 int padding1;
758 int va_arg_size;
759 HOST_WIDE_INT frame;
760 int padding2;
761 int outgoing_arguments_size;
762 int red_zone_size;
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string;
779 /* Parsed value. */
780 enum cmodel ix86_cmodel;
781 /* Asm dialect. */
782 const char *ix86_asm_string;
783 enum asm_dialect ix86_asm_dialect = ASM_ATT;
784 /* TLS dialext. */
785 const char *ix86_tls_dialect_string;
786 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string; /* for -mtune=<xxx> */
798 const char *ix86_arch_string; /* for -march=<xxx> */
799 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
808 int ix86_regparm;
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string;
821 /* Preferred alignment for stack boundary in bits. */
822 int ix86_preferred_stack_boundary;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost;
826 const char *ix86_branch_cost_string;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 static char internal_label_prefix[16];
833 static int internal_label_prefix_len;
835 static int local_symbolic_operand (rtx, enum machine_mode);
836 static int tls_symbolic_operand_1 (rtx, enum tls_model);
837 static void output_pic_addr_const (FILE *, rtx, int);
838 static void put_condition_code (enum rtx_code, enum machine_mode,
839 int, int, FILE *);
840 static const char *get_some_local_dynamic_name (void);
841 static int get_some_local_dynamic_name_1 (rtx *, void *);
842 static rtx maybe_get_pool_constant (rtx);
843 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
844 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
845 rtx *);
846 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
848 enum machine_mode);
849 static rtx get_thread_pointer (int);
850 static rtx legitimize_tls_address (rtx, enum tls_model, int);
851 static void get_pc_thunk_name (char [32], unsigned int);
852 static rtx gen_push (rtx);
853 static int memory_address_length (rtx addr);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx x86_this_parameter (tree);
872 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878 static tree ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
881 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
883 struct ix86_address
885 rtx base, index, disp;
886 HOST_WIDE_INT scale;
887 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
890 static int ix86_decompose_address (rtx, struct ix86_address *);
891 static int ix86_address_cost (rtx);
892 static bool ix86_cannot_force_const_mem (rtx);
893 static rtx ix86_delegitimize_address (rtx);
895 struct builtin_description;
896 static rtx ix86_expand_sse_comi (const struct builtin_description *,
897 tree, rtx);
898 static rtx ix86_expand_sse_compare (const struct builtin_description *,
899 tree, rtx);
900 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
901 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
902 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
903 static rtx ix86_expand_store_builtin (enum insn_code, tree);
904 static rtx safe_vector_operand (rtx, enum machine_mode);
905 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
906 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
907 enum rtx_code *, enum rtx_code *);
908 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
912 static int ix86_fp_comparison_cost (enum rtx_code code);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame *);
916 static int ix86_comp_type_attributes (tree, tree);
917 static int ix86_function_regparm (tree, tree);
918 const struct attribute_spec ix86_attribute_table[];
919 static bool ix86_function_ok_for_sibcall (tree, tree);
920 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
921 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
922 static int ix86_value_regno (enum machine_mode);
923 static bool contains_128bit_aligned_vector_p (tree);
924 static rtx ix86_struct_value_rtx (tree, int);
925 static bool ix86_ms_bitfield_layout_p (tree);
926 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
927 static int extended_reg_mentioned_1 (rtx *, void *);
928 static bool ix86_rtx_costs (rtx, int, int, int *);
929 static int min_insn_size (rtx);
930 static tree ix86_md_asm_clobbers (tree clobbers);
931 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
932 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
933 tree, bool);
935 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
936 static void ix86_svr3_asm_out_constructor (rtx, int);
937 #endif
939 /* Register class used for passing given 64bit part of the argument.
940 These represent classes as documented by the PS ABI, with the exception
941 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
942 use SF or DFmode move instead of DImode to avoid reformatting penalties.
944 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
945 whenever possible (upper half does contain padding).
947 enum x86_64_reg_class
949 X86_64_NO_CLASS,
950 X86_64_INTEGER_CLASS,
951 X86_64_INTEGERSI_CLASS,
952 X86_64_SSE_CLASS,
953 X86_64_SSESF_CLASS,
954 X86_64_SSEDF_CLASS,
955 X86_64_SSEUP_CLASS,
956 X86_64_X87_CLASS,
957 X86_64_X87UP_CLASS,
958 X86_64_MEMORY_CLASS
960 static const char * const x86_64_reg_class_name[] =
961 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
963 #define MAX_CLASSES 4
964 static int classify_argument (enum machine_mode, tree,
965 enum x86_64_reg_class [MAX_CLASSES], int);
966 static int examine_argument (enum machine_mode, tree, int, int *, int *);
967 static rtx construct_container (enum machine_mode, tree, int, int, int,
968 const int *, int);
969 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
970 enum x86_64_reg_class);
972 /* Table of constants used by fldpi, fldln2, etc.... */
973 static REAL_VALUE_TYPE ext_80387_constants_table [5];
974 static bool ext_80387_constants_init = 0;
975 static void init_ext_80387_constants (void);
977 /* Initialize the GCC target structure. */
978 #undef TARGET_ATTRIBUTE_TABLE
979 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
980 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
981 # undef TARGET_MERGE_DECL_ATTRIBUTES
982 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
983 #endif
985 #undef TARGET_COMP_TYPE_ATTRIBUTES
986 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
988 #undef TARGET_INIT_BUILTINS
989 #define TARGET_INIT_BUILTINS ix86_init_builtins
991 #undef TARGET_EXPAND_BUILTIN
992 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
994 #undef TARGET_ASM_FUNCTION_EPILOGUE
995 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
997 #undef TARGET_ASM_OPEN_PAREN
998 #define TARGET_ASM_OPEN_PAREN ""
999 #undef TARGET_ASM_CLOSE_PAREN
1000 #define TARGET_ASM_CLOSE_PAREN ""
1002 #undef TARGET_ASM_ALIGNED_HI_OP
1003 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1004 #undef TARGET_ASM_ALIGNED_SI_OP
1005 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1006 #ifdef ASM_QUAD
1007 #undef TARGET_ASM_ALIGNED_DI_OP
1008 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1009 #endif
1011 #undef TARGET_ASM_UNALIGNED_HI_OP
1012 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1013 #undef TARGET_ASM_UNALIGNED_SI_OP
1014 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1015 #undef TARGET_ASM_UNALIGNED_DI_OP
1016 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1018 #undef TARGET_SCHED_ADJUST_COST
1019 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1020 #undef TARGET_SCHED_ISSUE_RATE
1021 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1022 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1023 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1024 ia32_multipass_dfa_lookahead
1026 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1027 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1029 #ifdef HAVE_AS_TLS
1030 #undef TARGET_HAVE_TLS
1031 #define TARGET_HAVE_TLS true
1032 #endif
1033 #undef TARGET_CANNOT_FORCE_CONST_MEM
1034 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1036 #undef TARGET_DELEGITIMIZE_ADDRESS
1037 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1039 #undef TARGET_MS_BITFIELD_LAYOUT_P
1040 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1042 #undef TARGET_ASM_OUTPUT_MI_THUNK
1043 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1044 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1045 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1047 #undef TARGET_ASM_FILE_START
1048 #define TARGET_ASM_FILE_START x86_file_start
1050 #undef TARGET_RTX_COSTS
1051 #define TARGET_RTX_COSTS ix86_rtx_costs
1052 #undef TARGET_ADDRESS_COST
1053 #define TARGET_ADDRESS_COST ix86_address_cost
1055 #undef TARGET_FIXED_CONDITION_CODE_REGS
1056 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1057 #undef TARGET_CC_MODES_COMPATIBLE
1058 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1060 #undef TARGET_MACHINE_DEPENDENT_REORG
1061 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1063 #undef TARGET_BUILD_BUILTIN_VA_LIST
1064 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1066 #undef TARGET_MD_ASM_CLOBBERS
1067 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1069 #undef TARGET_PROMOTE_PROTOTYPES
1070 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1071 #undef TARGET_STRUCT_VALUE_RTX
1072 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1073 #undef TARGET_SETUP_INCOMING_VARARGS
1074 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1075 #undef TARGET_MUST_PASS_IN_STACK
1076 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1077 #undef TARGET_PASS_BY_REFERENCE
1078 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1080 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1081 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1083 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1084 #undef TARGET_INSERT_ATTRIBUTES
1085 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1086 #endif
1088 struct gcc_target targetm = TARGET_INITIALIZER;
1091 /* The svr4 ABI for the i386 says that records and unions are returned
1092 in memory. */
1093 #ifndef DEFAULT_PCC_STRUCT_RETURN
1094 #define DEFAULT_PCC_STRUCT_RETURN 1
1095 #endif
1097 /* Sometimes certain combinations of command options do not make
1098 sense on a particular target machine. You can define a macro
1099 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1100 defined, is executed once just after all the command options have
1101 been parsed.
1103 Don't use this macro to turn on various extra optimizations for
1104 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1106 void
1107 override_options (void)
1109 int i;
1110 int ix86_tune_defaulted = 0;
1112 /* Comes from final.c -- no real reason to change it. */
1113 #define MAX_CODE_ALIGN 16
1115 static struct ptt
1117 const struct processor_costs *cost; /* Processor costs */
1118 const int target_enable; /* Target flags to enable. */
1119 const int target_disable; /* Target flags to disable. */
1120 const int align_loop; /* Default alignments. */
1121 const int align_loop_max_skip;
1122 const int align_jump;
1123 const int align_jump_max_skip;
1124 const int align_func;
1126 const processor_target_table[PROCESSOR_max] =
1128 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1129 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1130 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1131 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1132 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1133 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1134 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1135 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1136 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1139 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1140 static struct pta
1142 const char *const name; /* processor name or nickname. */
1143 const enum processor_type processor;
1144 const enum pta_flags
1146 PTA_SSE = 1,
1147 PTA_SSE2 = 2,
1148 PTA_SSE3 = 4,
1149 PTA_MMX = 8,
1150 PTA_PREFETCH_SSE = 16,
1151 PTA_3DNOW = 32,
1152 PTA_3DNOW_A = 64,
1153 PTA_64BIT = 128
1154 } flags;
1156 const processor_alias_table[] =
1158 {"i386", PROCESSOR_I386, 0},
1159 {"i486", PROCESSOR_I486, 0},
1160 {"i586", PROCESSOR_PENTIUM, 0},
1161 {"pentium", PROCESSOR_PENTIUM, 0},
1162 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1163 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1164 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1165 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1166 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1167 {"i686", PROCESSOR_PENTIUMPRO, 0},
1168 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1169 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1170 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1171 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1172 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1173 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1174 | PTA_MMX | PTA_PREFETCH_SSE},
1175 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1176 | PTA_MMX | PTA_PREFETCH_SSE},
1177 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1178 | PTA_MMX | PTA_PREFETCH_SSE},
1179 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1180 | PTA_MMX | PTA_PREFETCH_SSE},
1181 {"k6", PROCESSOR_K6, PTA_MMX},
1182 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1183 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1184 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1185 | PTA_3DNOW_A},
1186 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1187 | PTA_3DNOW | PTA_3DNOW_A},
1188 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1189 | PTA_3DNOW_A | PTA_SSE},
1190 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1191 | PTA_3DNOW_A | PTA_SSE},
1192 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1193 | PTA_3DNOW_A | PTA_SSE},
1194 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1195 | PTA_SSE | PTA_SSE2 },
1196 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1197 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1198 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1199 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1200 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1201 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1202 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1203 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1206 int const pta_size = ARRAY_SIZE (processor_alias_table);
1208 /* Set the default values for switches whose default depends on TARGET_64BIT
1209 in case they weren't overwritten by command line options. */
1210 if (TARGET_64BIT)
1212 if (flag_omit_frame_pointer == 2)
1213 flag_omit_frame_pointer = 1;
1214 if (flag_asynchronous_unwind_tables == 2)
1215 flag_asynchronous_unwind_tables = 1;
1216 if (flag_pcc_struct_return == 2)
1217 flag_pcc_struct_return = 0;
1219 else
1221 if (flag_omit_frame_pointer == 2)
1222 flag_omit_frame_pointer = 0;
1223 if (flag_asynchronous_unwind_tables == 2)
1224 flag_asynchronous_unwind_tables = 0;
1225 if (flag_pcc_struct_return == 2)
1226 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1229 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1230 SUBTARGET_OVERRIDE_OPTIONS;
1231 #endif
1233 if (!ix86_tune_string && ix86_arch_string)
1234 ix86_tune_string = ix86_arch_string;
1235 if (!ix86_tune_string)
1237 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1238 ix86_tune_defaulted = 1;
1240 if (!ix86_arch_string)
1241 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1243 if (ix86_cmodel_string != 0)
1245 if (!strcmp (ix86_cmodel_string, "small"))
1246 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1247 else if (flag_pic)
1248 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1249 else if (!strcmp (ix86_cmodel_string, "32"))
1250 ix86_cmodel = CM_32;
1251 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1252 ix86_cmodel = CM_KERNEL;
1253 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1254 ix86_cmodel = CM_MEDIUM;
1255 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1256 ix86_cmodel = CM_LARGE;
1257 else
1258 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1260 else
1262 ix86_cmodel = CM_32;
1263 if (TARGET_64BIT)
1264 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1266 if (ix86_asm_string != 0)
1268 if (!strcmp (ix86_asm_string, "intel"))
1269 ix86_asm_dialect = ASM_INTEL;
1270 else if (!strcmp (ix86_asm_string, "att"))
1271 ix86_asm_dialect = ASM_ATT;
1272 else
1273 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1275 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1276 error ("code model `%s' not supported in the %s bit mode",
1277 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1278 if (ix86_cmodel == CM_LARGE)
1279 sorry ("code model `large' not supported yet");
1280 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1281 sorry ("%i-bit mode not compiled in",
1282 (target_flags & MASK_64BIT) ? 64 : 32);
1284 for (i = 0; i < pta_size; i++)
1285 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1287 ix86_arch = processor_alias_table[i].processor;
1288 /* Default cpu tuning to the architecture. */
1289 ix86_tune = ix86_arch;
1290 if (processor_alias_table[i].flags & PTA_MMX
1291 && !(target_flags_explicit & MASK_MMX))
1292 target_flags |= MASK_MMX;
1293 if (processor_alias_table[i].flags & PTA_3DNOW
1294 && !(target_flags_explicit & MASK_3DNOW))
1295 target_flags |= MASK_3DNOW;
1296 if (processor_alias_table[i].flags & PTA_3DNOW_A
1297 && !(target_flags_explicit & MASK_3DNOW_A))
1298 target_flags |= MASK_3DNOW_A;
1299 if (processor_alias_table[i].flags & PTA_SSE
1300 && !(target_flags_explicit & MASK_SSE))
1301 target_flags |= MASK_SSE;
1302 if (processor_alias_table[i].flags & PTA_SSE2
1303 && !(target_flags_explicit & MASK_SSE2))
1304 target_flags |= MASK_SSE2;
1305 if (processor_alias_table[i].flags & PTA_SSE3
1306 && !(target_flags_explicit & MASK_SSE3))
1307 target_flags |= MASK_SSE3;
1308 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1309 x86_prefetch_sse = true;
1310 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1312 if (ix86_tune_defaulted)
1314 ix86_tune_string = "x86-64";
1315 for (i = 0; i < pta_size; i++)
1316 if (! strcmp (ix86_tune_string,
1317 processor_alias_table[i].name))
1318 break;
1319 ix86_tune = processor_alias_table[i].processor;
1321 else
1322 error ("CPU you selected does not support x86-64 "
1323 "instruction set");
1325 break;
1328 if (i == pta_size)
1329 error ("bad value (%s) for -march= switch", ix86_arch_string);
1331 for (i = 0; i < pta_size; i++)
1332 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1334 ix86_tune = processor_alias_table[i].processor;
1335 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1336 error ("CPU you selected does not support x86-64 instruction set");
1338 /* Intel CPUs have always interpreted SSE prefetch instructions as
1339 NOPs; so, we can enable SSE prefetch instructions even when
1340 -mtune (rather than -march) points us to a processor that has them.
1341 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1342 higher processors. */
1343 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1344 x86_prefetch_sse = true;
1345 break;
1347 if (i == pta_size)
1348 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1350 if (optimize_size)
1351 ix86_cost = &size_cost;
1352 else
1353 ix86_cost = processor_target_table[ix86_tune].cost;
1354 target_flags |= processor_target_table[ix86_tune].target_enable;
1355 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1357 /* Arrange to set up i386_stack_locals for all functions. */
1358 init_machine_status = ix86_init_machine_status;
1360 /* Validate -mregparm= value. */
1361 if (ix86_regparm_string)
1363 i = atoi (ix86_regparm_string);
1364 if (i < 0 || i > REGPARM_MAX)
1365 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1366 else
1367 ix86_regparm = i;
1369 else
1370 if (TARGET_64BIT)
1371 ix86_regparm = REGPARM_MAX;
1373 /* If the user has provided any of the -malign-* options,
1374 warn and use that value only if -falign-* is not set.
1375 Remove this code in GCC 3.2 or later. */
1376 if (ix86_align_loops_string)
1378 warning ("-malign-loops is obsolete, use -falign-loops");
1379 if (align_loops == 0)
1381 i = atoi (ix86_align_loops_string);
1382 if (i < 0 || i > MAX_CODE_ALIGN)
1383 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1384 else
1385 align_loops = 1 << i;
1389 if (ix86_align_jumps_string)
1391 warning ("-malign-jumps is obsolete, use -falign-jumps");
1392 if (align_jumps == 0)
1394 i = atoi (ix86_align_jumps_string);
1395 if (i < 0 || i > MAX_CODE_ALIGN)
1396 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1397 else
1398 align_jumps = 1 << i;
1402 if (ix86_align_funcs_string)
1404 warning ("-malign-functions is obsolete, use -falign-functions");
1405 if (align_functions == 0)
1407 i = atoi (ix86_align_funcs_string);
1408 if (i < 0 || i > MAX_CODE_ALIGN)
1409 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1410 else
1411 align_functions = 1 << i;
1415 /* Default align_* from the processor table. */
1416 if (align_loops == 0)
1418 align_loops = processor_target_table[ix86_tune].align_loop;
1419 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1421 if (align_jumps == 0)
1423 align_jumps = processor_target_table[ix86_tune].align_jump;
1424 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1426 if (align_functions == 0)
1428 align_functions = processor_target_table[ix86_tune].align_func;
1431 /* Validate -mpreferred-stack-boundary= value, or provide default.
1432 The default of 128 bits is for Pentium III's SSE __m128, but we
1433 don't want additional code to keep the stack aligned when
1434 optimizing for code size. */
1435 ix86_preferred_stack_boundary = (optimize_size
1436 ? TARGET_64BIT ? 128 : 32
1437 : 128);
1438 if (ix86_preferred_stack_boundary_string)
1440 i = atoi (ix86_preferred_stack_boundary_string);
1441 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1442 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1443 TARGET_64BIT ? 4 : 2);
1444 else
1445 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1448 /* Validate -mbranch-cost= value, or provide default. */
1449 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1450 if (ix86_branch_cost_string)
1452 i = atoi (ix86_branch_cost_string);
1453 if (i < 0 || i > 5)
1454 error ("-mbranch-cost=%d is not between 0 and 5", i);
1455 else
1456 ix86_branch_cost = i;
1459 if (ix86_tls_dialect_string)
1461 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1462 ix86_tls_dialect = TLS_DIALECT_GNU;
1463 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1464 ix86_tls_dialect = TLS_DIALECT_SUN;
1465 else
1466 error ("bad value (%s) for -mtls-dialect= switch",
1467 ix86_tls_dialect_string);
1470 /* Keep nonleaf frame pointers. */
1471 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1472 flag_omit_frame_pointer = 1;
1474 /* If we're doing fast math, we don't care about comparison order
1475 wrt NaNs. This lets us use a shorter comparison sequence. */
1476 if (flag_unsafe_math_optimizations)
1477 target_flags &= ~MASK_IEEE_FP;
1479 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1480 since the insns won't need emulation. */
1481 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1482 target_flags &= ~MASK_NO_FANCY_MATH_387;
1484 /* Turn on SSE2 builtins for -msse3. */
1485 if (TARGET_SSE3)
1486 target_flags |= MASK_SSE2;
1488 /* Turn on SSE builtins for -msse2. */
1489 if (TARGET_SSE2)
1490 target_flags |= MASK_SSE;
1492 if (TARGET_64BIT)
1494 if (TARGET_ALIGN_DOUBLE)
1495 error ("-malign-double makes no sense in the 64bit mode");
1496 if (TARGET_RTD)
1497 error ("-mrtd calling convention not supported in the 64bit mode");
1498 /* Enable by default the SSE and MMX builtins. */
1499 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1500 ix86_fpmath = FPMATH_SSE;
1502 else
1504 ix86_fpmath = FPMATH_387;
1505 /* i386 ABI does not specify red zone. It still makes sense to use it
1506 when programmer takes care to stack from being destroyed. */
1507 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1508 target_flags |= MASK_NO_RED_ZONE;
1511 if (ix86_fpmath_string != 0)
1513 if (! strcmp (ix86_fpmath_string, "387"))
1514 ix86_fpmath = FPMATH_387;
1515 else if (! strcmp (ix86_fpmath_string, "sse"))
1517 if (!TARGET_SSE)
1519 warning ("SSE instruction set disabled, using 387 arithmetics");
1520 ix86_fpmath = FPMATH_387;
1522 else
1523 ix86_fpmath = FPMATH_SSE;
1525 else if (! strcmp (ix86_fpmath_string, "387,sse")
1526 || ! strcmp (ix86_fpmath_string, "sse,387"))
1528 if (!TARGET_SSE)
1530 warning ("SSE instruction set disabled, using 387 arithmetics");
1531 ix86_fpmath = FPMATH_387;
1533 else if (!TARGET_80387)
1535 warning ("387 instruction set disabled, using SSE arithmetics");
1536 ix86_fpmath = FPMATH_SSE;
1538 else
1539 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1541 else
1542 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1545 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1546 on by -msse. */
1547 if (TARGET_SSE)
1549 target_flags |= MASK_MMX;
1550 x86_prefetch_sse = true;
1553 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1554 if (TARGET_3DNOW)
1556 target_flags |= MASK_MMX;
1557 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1558 extensions it adds. */
1559 if (x86_3dnow_a & (1 << ix86_arch))
1560 target_flags |= MASK_3DNOW_A;
1562 if ((x86_accumulate_outgoing_args & TUNEMASK)
1563 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1564 && !optimize_size)
1565 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1567 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1569 char *p;
1570 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1571 p = strchr (internal_label_prefix, 'X');
1572 internal_label_prefix_len = p - internal_label_prefix;
1573 *p = '\0';
1577 void
1578 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1580 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1581 make the problem with not enough registers even worse. */
1582 #ifdef INSN_SCHEDULING
1583 if (level > 1)
1584 flag_schedule_insns = 0;
1585 #endif
1587 /* The default values of these switches depend on the TARGET_64BIT
1588 that is not known at this moment. Mark these values with 2 and
1589 let user the to override these. In case there is no command line option
1590 specifying them, we will set the defaults in override_options. */
1591 if (optimize >= 1)
1592 flag_omit_frame_pointer = 2;
1593 flag_pcc_struct_return = 2;
1594 flag_asynchronous_unwind_tables = 2;
1597 /* Table of valid machine attributes. */
1598 const struct attribute_spec ix86_attribute_table[] =
1600 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1601 /* Stdcall attribute says callee is responsible for popping arguments
1602 if they are not variable. */
1603 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1604 /* Fastcall attribute says callee is responsible for popping arguments
1605 if they are not variable. */
1606 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1607 /* Cdecl attribute says the callee is a normal C declaration */
1608 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1609 /* Regparm attribute specifies how many integer arguments are to be
1610 passed in registers. */
1611 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1612 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1613 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1614 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1615 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1616 #endif
1617 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1618 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1619 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1620 SUBTARGET_ATTRIBUTE_TABLE,
1621 #endif
1622 { NULL, 0, 0, false, false, false, NULL }
1625 /* Decide whether we can make a sibling call to a function. DECL is the
1626 declaration of the function being targeted by the call and EXP is the
1627 CALL_EXPR representing the call. */
1629 static bool
1630 ix86_function_ok_for_sibcall (tree decl, tree exp)
1632 /* If we are generating position-independent code, we cannot sibcall
1633 optimize any indirect call, or a direct call to a global function,
1634 as the PLT requires %ebx be live. */
1635 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1636 return false;
1638 /* If we are returning floats on the 80387 register stack, we cannot
1639 make a sibcall from a function that doesn't return a float to a
1640 function that does or, conversely, from a function that does return
1641 a float to a function that doesn't; the necessary stack adjustment
1642 would not be executed. */
1643 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1644 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1645 return false;
1647 /* If this call is indirect, we'll need to be able to use a call-clobbered
1648 register for the address of the target function. Make sure that all
1649 such registers are not used for passing parameters. */
1650 if (!decl && !TARGET_64BIT)
1652 tree type;
1654 /* We're looking at the CALL_EXPR, we need the type of the function. */
1655 type = TREE_OPERAND (exp, 0); /* pointer expression */
1656 type = TREE_TYPE (type); /* pointer type */
1657 type = TREE_TYPE (type); /* function type */
1659 if (ix86_function_regparm (type, NULL) >= 3)
1661 /* ??? Need to count the actual number of registers to be used,
1662 not the possible number of registers. Fix later. */
1663 return false;
1667 /* Otherwise okay. That also includes certain types of indirect calls. */
1668 return true;
1671 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1672 arguments as in struct attribute_spec.handler. */
1673 static tree
1674 ix86_handle_cdecl_attribute (tree *node, tree name,
1675 tree args ATTRIBUTE_UNUSED,
1676 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1678 if (TREE_CODE (*node) != FUNCTION_TYPE
1679 && TREE_CODE (*node) != METHOD_TYPE
1680 && TREE_CODE (*node) != FIELD_DECL
1681 && TREE_CODE (*node) != TYPE_DECL)
1683 warning ("`%s' attribute only applies to functions",
1684 IDENTIFIER_POINTER (name));
1685 *no_add_attrs = true;
1687 else
1689 if (is_attribute_p ("fastcall", name))
1691 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1693 error ("fastcall and stdcall attributes are not compatible");
1695 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1697 error ("fastcall and regparm attributes are not compatible");
1700 else if (is_attribute_p ("stdcall", name))
1702 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1704 error ("fastcall and stdcall attributes are not compatible");
1709 if (TARGET_64BIT)
1711 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1712 *no_add_attrs = true;
1715 return NULL_TREE;
1718 /* Handle a "regparm" attribute;
1719 arguments as in struct attribute_spec.handler. */
1720 static tree
1721 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1722 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1724 if (TREE_CODE (*node) != FUNCTION_TYPE
1725 && TREE_CODE (*node) != METHOD_TYPE
1726 && TREE_CODE (*node) != FIELD_DECL
1727 && TREE_CODE (*node) != TYPE_DECL)
1729 warning ("`%s' attribute only applies to functions",
1730 IDENTIFIER_POINTER (name));
1731 *no_add_attrs = true;
1733 else
1735 tree cst;
1737 cst = TREE_VALUE (args);
1738 if (TREE_CODE (cst) != INTEGER_CST)
1740 warning ("`%s' attribute requires an integer constant argument",
1741 IDENTIFIER_POINTER (name));
1742 *no_add_attrs = true;
1744 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1746 warning ("argument to `%s' attribute larger than %d",
1747 IDENTIFIER_POINTER (name), REGPARM_MAX);
1748 *no_add_attrs = true;
1751 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1753 error ("fastcall and regparm attributes are not compatible");
1757 return NULL_TREE;
1760 /* Return 0 if the attributes for two types are incompatible, 1 if they
1761 are compatible, and 2 if they are nearly compatible (which causes a
1762 warning to be generated). */
1764 static int
1765 ix86_comp_type_attributes (tree type1, tree type2)
1767 /* Check for mismatch of non-default calling convention. */
1768 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1770 if (TREE_CODE (type1) != FUNCTION_TYPE)
1771 return 1;
1773 /* Check for mismatched fastcall types */
1774 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1775 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1776 return 0;
1778 /* Check for mismatched return types (cdecl vs stdcall). */
1779 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1780 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1781 return 0;
1782 if (ix86_function_regparm (type1, NULL)
1783 != ix86_function_regparm (type2, NULL))
1784 return 0;
1785 return 1;
1788 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1789 DECL may be NULL when calling function indirectly
1790 or considering a libcall. */
1792 static int
1793 ix86_function_regparm (tree type, tree decl)
1795 tree attr;
1796 int regparm = ix86_regparm;
1797 bool user_convention = false;
1799 if (!TARGET_64BIT)
1801 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1802 if (attr)
1804 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1805 user_convention = true;
1808 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1810 regparm = 2;
1811 user_convention = true;
1814 /* Use register calling convention for local functions when possible. */
1815 if (!TARGET_64BIT && !user_convention && decl
1816 && flag_unit_at_a_time && !profile_flag)
1818 struct cgraph_local_info *i = cgraph_local_info (decl);
1819 if (i && i->local)
1821 /* We can't use regparm(3) for nested functions as these use
1822 static chain pointer in third argument. */
1823 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1824 regparm = 2;
1825 else
1826 regparm = 3;
1830 return regparm;
1833 /* Return true if EAX is live at the start of the function. Used by
1834 ix86_expand_prologue to determine if we need special help before
1835 calling allocate_stack_worker. */
1837 static bool
1838 ix86_eax_live_at_start_p (void)
1840 /* Cheat. Don't bother working forward from ix86_function_regparm
1841 to the function type to whether an actual argument is located in
1842 eax. Instead just look at cfg info, which is still close enough
1843 to correct at this point. This gives false positives for broken
1844 functions that might use uninitialized data that happens to be
1845 allocated in eax, but who cares? */
1846 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1849 /* Value is the number of bytes of arguments automatically
1850 popped when returning from a subroutine call.
1851 FUNDECL is the declaration node of the function (as a tree),
1852 FUNTYPE is the data type of the function (as a tree),
1853 or for a library call it is an identifier node for the subroutine name.
1854 SIZE is the number of bytes of arguments passed on the stack.
1856 On the 80386, the RTD insn may be used to pop them if the number
1857 of args is fixed, but if the number is variable then the caller
1858 must pop them all. RTD can't be used for library calls now
1859 because the library is compiled with the Unix compiler.
1860 Use of RTD is a selectable option, since it is incompatible with
1861 standard Unix calling sequences. If the option is not selected,
1862 the caller must always pop the args.
1864 The attribute stdcall is equivalent to RTD on a per module basis. */
1867 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1869 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1871 /* Cdecl functions override -mrtd, and never pop the stack. */
1872 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1874 /* Stdcall and fastcall functions will pop the stack if not
1875 variable args. */
1876 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1877 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1878 rtd = 1;
1880 if (rtd
1881 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1882 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1883 == void_type_node)))
1884 return size;
1887 /* Lose any fake structure return argument if it is passed on the stack. */
1888 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1889 && !TARGET_64BIT)
1891 int nregs = ix86_function_regparm (funtype, fundecl);
1893 if (!nregs)
1894 return GET_MODE_SIZE (Pmode);
1897 return 0;
1900 /* Argument support functions. */
1902 /* Return true when register may be used to pass function parameters. */
1903 bool
1904 ix86_function_arg_regno_p (int regno)
1906 int i;
1907 if (!TARGET_64BIT)
1908 return (regno < REGPARM_MAX
1909 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1910 if (SSE_REGNO_P (regno) && TARGET_SSE)
1911 return true;
1912 /* RAX is used as hidden argument to va_arg functions. */
1913 if (!regno)
1914 return true;
1915 for (i = 0; i < REGPARM_MAX; i++)
1916 if (regno == x86_64_int_parameter_registers[i])
1917 return true;
1918 return false;
1921 /* Return if we do not know how to pass TYPE solely in registers. */
1923 static bool
1924 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1926 if (must_pass_in_stack_var_size_or_pad (mode, type))
1927 return true;
1928 return (!TARGET_64BIT && type && mode == TImode);
1931 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1932 for a call to a function whose data type is FNTYPE.
1933 For a library call, FNTYPE is 0. */
1935 void
1936 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1937 tree fntype, /* tree ptr for function decl */
1938 rtx libname, /* SYMBOL_REF of library name or 0 */
1939 tree fndecl)
1941 static CUMULATIVE_ARGS zero_cum;
1942 tree param, next_param;
1944 if (TARGET_DEBUG_ARG)
1946 fprintf (stderr, "\ninit_cumulative_args (");
1947 if (fntype)
1948 fprintf (stderr, "fntype code = %s, ret code = %s",
1949 tree_code_name[(int) TREE_CODE (fntype)],
1950 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1951 else
1952 fprintf (stderr, "no fntype");
1954 if (libname)
1955 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1958 *cum = zero_cum;
1960 /* Set up the number of registers to use for passing arguments. */
1961 if (fntype)
1962 cum->nregs = ix86_function_regparm (fntype, fndecl);
1963 else
1964 cum->nregs = ix86_regparm;
1965 if (TARGET_SSE)
1966 cum->sse_nregs = SSE_REGPARM_MAX;
1967 if (TARGET_MMX)
1968 cum->mmx_nregs = MMX_REGPARM_MAX;
1969 cum->warn_sse = true;
1970 cum->warn_mmx = true;
1971 cum->maybe_vaarg = false;
1973 /* Use ecx and edx registers if function has fastcall attribute */
1974 if (fntype && !TARGET_64BIT)
1976 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1978 cum->nregs = 2;
1979 cum->fastcall = 1;
1983 /* Determine if this function has variable arguments. This is
1984 indicated by the last argument being 'void_type_mode' if there
1985 are no variable arguments. If there are variable arguments, then
1986 we won't pass anything in registers in 32-bit mode. */
1988 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
1990 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1991 param != 0; param = next_param)
1993 next_param = TREE_CHAIN (param);
1994 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1996 if (!TARGET_64BIT)
1998 cum->nregs = 0;
1999 cum->sse_nregs = 0;
2000 cum->mmx_nregs = 0;
2001 cum->warn_sse = 0;
2002 cum->warn_mmx = 0;
2003 cum->fastcall = 0;
2005 cum->maybe_vaarg = true;
2009 if ((!fntype && !libname)
2010 || (fntype && !TYPE_ARG_TYPES (fntype)))
2011 cum->maybe_vaarg = 1;
2013 if (TARGET_DEBUG_ARG)
2014 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2016 return;
2019 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2020 of this code is to classify each 8bytes of incoming argument by the register
2021 class and assign registers accordingly. */
2023 /* Return the union class of CLASS1 and CLASS2.
2024 See the x86-64 PS ABI for details. */
2026 static enum x86_64_reg_class
2027 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2029 /* Rule #1: If both classes are equal, this is the resulting class. */
2030 if (class1 == class2)
2031 return class1;
2033 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2034 the other class. */
2035 if (class1 == X86_64_NO_CLASS)
2036 return class2;
2037 if (class2 == X86_64_NO_CLASS)
2038 return class1;
2040 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2041 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2042 return X86_64_MEMORY_CLASS;
2044 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2045 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2046 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2047 return X86_64_INTEGERSI_CLASS;
2048 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2049 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2050 return X86_64_INTEGER_CLASS;
2052 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2053 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2054 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2055 return X86_64_MEMORY_CLASS;
2057 /* Rule #6: Otherwise class SSE is used. */
2058 return X86_64_SSE_CLASS;
2061 /* Classify the argument of type TYPE and mode MODE.
2062 CLASSES will be filled by the register class used to pass each word
2063 of the operand. The number of words is returned. In case the parameter
2064 should be passed in memory, 0 is returned. As a special case for zero
2065 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2067 BIT_OFFSET is used internally for handling records and specifies offset
2068 of the offset in bits modulo 256 to avoid overflow cases.
2070 See the x86-64 PS ABI for details.
2073 static int
2074 classify_argument (enum machine_mode mode, tree type,
2075 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2077 HOST_WIDE_INT bytes =
2078 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2079 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2081 /* Variable sized entities are always passed/returned in memory. */
2082 if (bytes < 0)
2083 return 0;
2085 if (mode != VOIDmode
2086 && targetm.calls.must_pass_in_stack (mode, type))
2087 return 0;
2089 if (type && AGGREGATE_TYPE_P (type))
2091 int i;
2092 tree field;
2093 enum x86_64_reg_class subclasses[MAX_CLASSES];
2095 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2096 if (bytes > 16)
2097 return 0;
2099 for (i = 0; i < words; i++)
2100 classes[i] = X86_64_NO_CLASS;
2102 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2103 signalize memory class, so handle it as special case. */
2104 if (!words)
2106 classes[0] = X86_64_NO_CLASS;
2107 return 1;
2110 /* Classify each field of record and merge classes. */
2111 if (TREE_CODE (type) == RECORD_TYPE)
2113 /* For classes first merge in the field of the subclasses. */
2114 if (TYPE_BINFO (type))
2116 tree binfo, base_binfo;
2117 int i;
2119 for (binfo = TYPE_BINFO (type), i = 0;
2120 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2122 int num;
2123 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2124 tree type = BINFO_TYPE (base_binfo);
2126 num = classify_argument (TYPE_MODE (type),
2127 type, subclasses,
2128 (offset + bit_offset) % 256);
2129 if (!num)
2130 return 0;
2131 for (i = 0; i < num; i++)
2133 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2134 classes[i + pos] =
2135 merge_classes (subclasses[i], classes[i + pos]);
2139 /* And now merge the fields of structure. */
2140 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2142 if (TREE_CODE (field) == FIELD_DECL)
2144 int num;
2146 /* Bitfields are always classified as integer. Handle them
2147 early, since later code would consider them to be
2148 misaligned integers. */
2149 if (DECL_BIT_FIELD (field))
2151 for (i = int_bit_position (field) / 8 / 8;
2152 i < (int_bit_position (field)
2153 + tree_low_cst (DECL_SIZE (field), 0)
2154 + 63) / 8 / 8; i++)
2155 classes[i] =
2156 merge_classes (X86_64_INTEGER_CLASS,
2157 classes[i]);
2159 else
2161 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2162 TREE_TYPE (field), subclasses,
2163 (int_bit_position (field)
2164 + bit_offset) % 256);
2165 if (!num)
2166 return 0;
2167 for (i = 0; i < num; i++)
2169 int pos =
2170 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2171 classes[i + pos] =
2172 merge_classes (subclasses[i], classes[i + pos]);
2178 /* Arrays are handled as small records. */
2179 else if (TREE_CODE (type) == ARRAY_TYPE)
2181 int num;
2182 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2183 TREE_TYPE (type), subclasses, bit_offset);
2184 if (!num)
2185 return 0;
2187 /* The partial classes are now full classes. */
2188 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2189 subclasses[0] = X86_64_SSE_CLASS;
2190 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2191 subclasses[0] = X86_64_INTEGER_CLASS;
2193 for (i = 0; i < words; i++)
2194 classes[i] = subclasses[i % num];
2196 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2197 else if (TREE_CODE (type) == UNION_TYPE
2198 || TREE_CODE (type) == QUAL_UNION_TYPE)
2200 /* For classes first merge in the field of the subclasses. */
2201 if (TYPE_BINFO (type))
2203 tree binfo, base_binfo;
2204 int i;
2206 for (binfo = TYPE_BINFO (type), i = 0;
2207 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2209 int num;
2210 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2211 tree type = BINFO_TYPE (base_binfo);
2213 num = classify_argument (TYPE_MODE (type),
2214 type, subclasses,
2215 (offset + (bit_offset % 64)) % 256);
2216 if (!num)
2217 return 0;
2218 for (i = 0; i < num; i++)
2220 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2221 classes[i + pos] =
2222 merge_classes (subclasses[i], classes[i + pos]);
2226 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2228 if (TREE_CODE (field) == FIELD_DECL)
2230 int num;
2231 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2232 TREE_TYPE (field), subclasses,
2233 bit_offset);
2234 if (!num)
2235 return 0;
2236 for (i = 0; i < num; i++)
2237 classes[i] = merge_classes (subclasses[i], classes[i]);
2241 else if (TREE_CODE (type) == SET_TYPE)
2243 if (bytes <= 4)
2245 classes[0] = X86_64_INTEGERSI_CLASS;
2246 return 1;
2248 else if (bytes <= 8)
2250 classes[0] = X86_64_INTEGER_CLASS;
2251 return 1;
2253 else if (bytes <= 12)
2255 classes[0] = X86_64_INTEGER_CLASS;
2256 classes[1] = X86_64_INTEGERSI_CLASS;
2257 return 2;
2259 else
2261 classes[0] = X86_64_INTEGER_CLASS;
2262 classes[1] = X86_64_INTEGER_CLASS;
2263 return 2;
2266 else
2267 abort ();
2269 /* Final merger cleanup. */
2270 for (i = 0; i < words; i++)
2272 /* If one class is MEMORY, everything should be passed in
2273 memory. */
2274 if (classes[i] == X86_64_MEMORY_CLASS)
2275 return 0;
2277 /* The X86_64_SSEUP_CLASS should be always preceded by
2278 X86_64_SSE_CLASS. */
2279 if (classes[i] == X86_64_SSEUP_CLASS
2280 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2281 classes[i] = X86_64_SSE_CLASS;
2283 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2284 if (classes[i] == X86_64_X87UP_CLASS
2285 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2286 classes[i] = X86_64_SSE_CLASS;
2288 return words;
2291 /* Compute alignment needed. We align all types to natural boundaries with
2292 exception of XFmode that is aligned to 64bits. */
2293 if (mode != VOIDmode && mode != BLKmode)
2295 int mode_alignment = GET_MODE_BITSIZE (mode);
2297 if (mode == XFmode)
2298 mode_alignment = 128;
2299 else if (mode == XCmode)
2300 mode_alignment = 256;
2301 if (COMPLEX_MODE_P (mode))
2302 mode_alignment /= 2;
2303 /* Misaligned fields are always returned in memory. */
2304 if (bit_offset % mode_alignment)
2305 return 0;
2308 /* for V1xx modes, just use the base mode */
2309 if (VECTOR_MODE_P (mode)
2310 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2311 mode = GET_MODE_INNER (mode);
2313 /* Classification of atomic types. */
2314 switch (mode)
2316 case DImode:
2317 case SImode:
2318 case HImode:
2319 case QImode:
2320 case CSImode:
2321 case CHImode:
2322 case CQImode:
2323 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2324 classes[0] = X86_64_INTEGERSI_CLASS;
2325 else
2326 classes[0] = X86_64_INTEGER_CLASS;
2327 return 1;
2328 case CDImode:
2329 case TImode:
2330 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2331 return 2;
2332 case CTImode:
2333 return 0;
2334 case SFmode:
2335 if (!(bit_offset % 64))
2336 classes[0] = X86_64_SSESF_CLASS;
2337 else
2338 classes[0] = X86_64_SSE_CLASS;
2339 return 1;
2340 case DFmode:
2341 classes[0] = X86_64_SSEDF_CLASS;
2342 return 1;
2343 case XFmode:
2344 classes[0] = X86_64_X87_CLASS;
2345 classes[1] = X86_64_X87UP_CLASS;
2346 return 2;
2347 case TFmode:
2348 classes[0] = X86_64_SSE_CLASS;
2349 classes[1] = X86_64_SSEUP_CLASS;
2350 return 2;
2351 case SCmode:
2352 classes[0] = X86_64_SSE_CLASS;
2353 return 1;
2354 case DCmode:
2355 classes[0] = X86_64_SSEDF_CLASS;
2356 classes[1] = X86_64_SSEDF_CLASS;
2357 return 2;
2358 case XCmode:
2359 case TCmode:
2360 /* These modes are larger than 16 bytes. */
2361 return 0;
2362 case V4SFmode:
2363 case V4SImode:
2364 case V16QImode:
2365 case V8HImode:
2366 case V2DFmode:
2367 case V2DImode:
2368 classes[0] = X86_64_SSE_CLASS;
2369 classes[1] = X86_64_SSEUP_CLASS;
2370 return 2;
2371 case V2SFmode:
2372 case V2SImode:
2373 case V4HImode:
2374 case V8QImode:
2375 classes[0] = X86_64_SSE_CLASS;
2376 return 1;
2377 case BLKmode:
2378 case VOIDmode:
2379 return 0;
2380 default:
2381 if (VECTOR_MODE_P (mode))
2383 if (bytes > 16)
2384 return 0;
2385 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2387 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2388 classes[0] = X86_64_INTEGERSI_CLASS;
2389 else
2390 classes[0] = X86_64_INTEGER_CLASS;
2391 classes[1] = X86_64_INTEGER_CLASS;
2392 return 1 + (bytes > 8);
2395 abort ();
2399 /* Examine the argument and return set number of register required in each
2400 class. Return 0 iff parameter should be passed in memory. */
2401 static int
2402 examine_argument (enum machine_mode mode, tree type, int in_return,
2403 int *int_nregs, int *sse_nregs)
2405 enum x86_64_reg_class class[MAX_CLASSES];
2406 int n = classify_argument (mode, type, class, 0);
2408 *int_nregs = 0;
2409 *sse_nregs = 0;
2410 if (!n)
2411 return 0;
2412 for (n--; n >= 0; n--)
2413 switch (class[n])
2415 case X86_64_INTEGER_CLASS:
2416 case X86_64_INTEGERSI_CLASS:
2417 (*int_nregs)++;
2418 break;
2419 case X86_64_SSE_CLASS:
2420 case X86_64_SSESF_CLASS:
2421 case X86_64_SSEDF_CLASS:
2422 (*sse_nregs)++;
2423 break;
2424 case X86_64_NO_CLASS:
2425 case X86_64_SSEUP_CLASS:
2426 break;
2427 case X86_64_X87_CLASS:
2428 case X86_64_X87UP_CLASS:
2429 if (!in_return)
2430 return 0;
2431 break;
2432 case X86_64_MEMORY_CLASS:
2433 abort ();
2435 return 1;
2437 /* Construct container for the argument used by GCC interface. See
2438 FUNCTION_ARG for the detailed description. */
2439 static rtx
2440 construct_container (enum machine_mode mode, tree type, int in_return,
2441 int nintregs, int nsseregs, const int * intreg,
2442 int sse_regno)
2444 enum machine_mode tmpmode;
2445 int bytes =
2446 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2447 enum x86_64_reg_class class[MAX_CLASSES];
2448 int n;
2449 int i;
2450 int nexps = 0;
2451 int needed_sseregs, needed_intregs;
2452 rtx exp[MAX_CLASSES];
2453 rtx ret;
2455 n = classify_argument (mode, type, class, 0);
2456 if (TARGET_DEBUG_ARG)
2458 if (!n)
2459 fprintf (stderr, "Memory class\n");
2460 else
2462 fprintf (stderr, "Classes:");
2463 for (i = 0; i < n; i++)
2465 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2467 fprintf (stderr, "\n");
2470 if (!n)
2471 return NULL;
2472 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2473 return NULL;
2474 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2475 return NULL;
2477 /* First construct simple cases. Avoid SCmode, since we want to use
2478 single register to pass this type. */
2479 if (n == 1 && mode != SCmode)
2480 switch (class[0])
2482 case X86_64_INTEGER_CLASS:
2483 case X86_64_INTEGERSI_CLASS:
2484 return gen_rtx_REG (mode, intreg[0]);
2485 case X86_64_SSE_CLASS:
2486 case X86_64_SSESF_CLASS:
2487 case X86_64_SSEDF_CLASS:
2488 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2489 case X86_64_X87_CLASS:
2490 return gen_rtx_REG (mode, FIRST_STACK_REG);
2491 case X86_64_NO_CLASS:
2492 /* Zero sized array, struct or class. */
2493 return NULL;
2494 default:
2495 abort ();
2497 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2498 && mode != BLKmode)
2499 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2500 if (n == 2
2501 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2502 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2503 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2504 && class[1] == X86_64_INTEGER_CLASS
2505 && (mode == CDImode || mode == TImode || mode == TFmode)
2506 && intreg[0] + 1 == intreg[1])
2507 return gen_rtx_REG (mode, intreg[0]);
2508 if (n == 4
2509 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2510 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2511 && mode != BLKmode)
2512 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2514 /* Otherwise figure out the entries of the PARALLEL. */
2515 for (i = 0; i < n; i++)
2517 switch (class[i])
2519 case X86_64_NO_CLASS:
2520 break;
2521 case X86_64_INTEGER_CLASS:
2522 case X86_64_INTEGERSI_CLASS:
2523 /* Merge TImodes on aligned occasions here too. */
2524 if (i * 8 + 8 > bytes)
2525 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2526 else if (class[i] == X86_64_INTEGERSI_CLASS)
2527 tmpmode = SImode;
2528 else
2529 tmpmode = DImode;
2530 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2531 if (tmpmode == BLKmode)
2532 tmpmode = DImode;
2533 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2534 gen_rtx_REG (tmpmode, *intreg),
2535 GEN_INT (i*8));
2536 intreg++;
2537 break;
2538 case X86_64_SSESF_CLASS:
2539 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2540 gen_rtx_REG (SFmode,
2541 SSE_REGNO (sse_regno)),
2542 GEN_INT (i*8));
2543 sse_regno++;
2544 break;
2545 case X86_64_SSEDF_CLASS:
2546 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2547 gen_rtx_REG (DFmode,
2548 SSE_REGNO (sse_regno)),
2549 GEN_INT (i*8));
2550 sse_regno++;
2551 break;
2552 case X86_64_SSE_CLASS:
2553 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2554 tmpmode = TImode;
2555 else
2556 tmpmode = DImode;
2557 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2558 gen_rtx_REG (tmpmode,
2559 SSE_REGNO (sse_regno)),
2560 GEN_INT (i*8));
2561 if (tmpmode == TImode)
2562 i++;
2563 sse_regno++;
2564 break;
2565 default:
2566 abort ();
2569 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2570 for (i = 0; i < nexps; i++)
2571 XVECEXP (ret, 0, i) = exp [i];
2572 return ret;
2575 /* Update the data in CUM to advance over an argument
2576 of mode MODE and data type TYPE.
2577 (TYPE is null for libcalls where that information may not be available.) */
2579 void
2580 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2581 enum machine_mode mode, /* current arg mode */
2582 tree type, /* type of the argument or 0 if lib support */
2583 int named) /* whether or not the argument was named */
2585 int bytes =
2586 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2587 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2589 if (TARGET_DEBUG_ARG)
2590 fprintf (stderr,
2591 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2592 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2593 if (TARGET_64BIT)
2595 int int_nregs, sse_nregs;
2596 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2597 cum->words += words;
2598 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2600 cum->nregs -= int_nregs;
2601 cum->sse_nregs -= sse_nregs;
2602 cum->regno += int_nregs;
2603 cum->sse_regno += sse_nregs;
2605 else
2606 cum->words += words;
2608 else
2610 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2611 && (!type || !AGGREGATE_TYPE_P (type)))
2613 cum->sse_words += words;
2614 cum->sse_nregs -= 1;
2615 cum->sse_regno += 1;
2616 if (cum->sse_nregs <= 0)
2618 cum->sse_nregs = 0;
2619 cum->sse_regno = 0;
2622 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2623 && (!type || !AGGREGATE_TYPE_P (type)))
2625 cum->mmx_words += words;
2626 cum->mmx_nregs -= 1;
2627 cum->mmx_regno += 1;
2628 if (cum->mmx_nregs <= 0)
2630 cum->mmx_nregs = 0;
2631 cum->mmx_regno = 0;
2634 else
2636 cum->words += words;
2637 cum->nregs -= words;
2638 cum->regno += words;
2640 if (cum->nregs <= 0)
2642 cum->nregs = 0;
2643 cum->regno = 0;
2647 return;
2650 /* Define where to put the arguments to a function.
2651 Value is zero to push the argument on the stack,
2652 or a hard register in which to store the argument.
2654 MODE is the argument's machine mode.
2655 TYPE is the data type of the argument (as a tree).
2656 This is null for libcalls where that information may
2657 not be available.
2658 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2659 the preceding args and about the function being called.
2660 NAMED is nonzero if this argument is a named parameter
2661 (otherwise it is an extra parameter matching an ellipsis). */
2664 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2665 enum machine_mode mode, /* current arg mode */
2666 tree type, /* type of the argument or 0 if lib support */
2667 int named) /* != 0 for normal args, == 0 for ... args */
2669 rtx ret = NULL_RTX;
2670 int bytes =
2671 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2672 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2673 static bool warnedsse, warnedmmx;
2675 /* Handle a hidden AL argument containing number of registers for varargs
2676 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2677 any AL settings. */
2678 if (mode == VOIDmode)
2680 if (TARGET_64BIT)
2681 return GEN_INT (cum->maybe_vaarg
2682 ? (cum->sse_nregs < 0
2683 ? SSE_REGPARM_MAX
2684 : cum->sse_regno)
2685 : -1);
2686 else
2687 return constm1_rtx;
2689 if (TARGET_64BIT)
2690 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2691 &x86_64_int_parameter_registers [cum->regno],
2692 cum->sse_regno);
2693 else
2694 switch (mode)
2696 /* For now, pass fp/complex values on the stack. */
2697 default:
2698 break;
2700 case BLKmode:
2701 if (bytes < 0)
2702 break;
2703 /* FALLTHRU */
2704 case DImode:
2705 case SImode:
2706 case HImode:
2707 case QImode:
2708 if (words <= cum->nregs)
2710 int regno = cum->regno;
2712 /* Fastcall allocates the first two DWORD (SImode) or
2713 smaller arguments to ECX and EDX. */
2714 if (cum->fastcall)
2716 if (mode == BLKmode || mode == DImode)
2717 break;
2719 /* ECX not EAX is the first allocated register. */
2720 if (regno == 0)
2721 regno = 2;
2723 ret = gen_rtx_REG (mode, regno);
2725 break;
2726 case TImode:
2727 case V16QImode:
2728 case V8HImode:
2729 case V4SImode:
2730 case V2DImode:
2731 case V4SFmode:
2732 case V2DFmode:
2733 if (!type || !AGGREGATE_TYPE_P (type))
2735 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2737 warnedsse = true;
2738 warning ("SSE vector argument without SSE enabled "
2739 "changes the ABI");
2741 if (cum->sse_nregs)
2742 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2744 break;
2745 case V8QImode:
2746 case V4HImode:
2747 case V2SImode:
2748 case V2SFmode:
2749 if (!type || !AGGREGATE_TYPE_P (type))
2751 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2753 warnedmmx = true;
2754 warning ("MMX vector argument without MMX enabled "
2755 "changes the ABI");
2757 if (cum->mmx_nregs)
2758 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2760 break;
2763 if (TARGET_DEBUG_ARG)
2765 fprintf (stderr,
2766 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2767 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2769 if (ret)
2770 print_simple_rtl (stderr, ret);
2771 else
2772 fprintf (stderr, ", stack");
2774 fprintf (stderr, " )\n");
2777 return ret;
2780 /* A C expression that indicates when an argument must be passed by
2781 reference. If nonzero for an argument, a copy of that argument is
2782 made in memory and a pointer to the argument is passed instead of
2783 the argument itself. The pointer is passed in whatever way is
2784 appropriate for passing a pointer to that type. */
2786 static bool
2787 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2788 enum machine_mode mode ATTRIBUTE_UNUSED,
2789 tree type, bool named ATTRIBUTE_UNUSED)
2791 if (!TARGET_64BIT)
2792 return 0;
2794 if (type && int_size_in_bytes (type) == -1)
2796 if (TARGET_DEBUG_ARG)
2797 fprintf (stderr, "function_arg_pass_by_reference\n");
2798 return 1;
2801 return 0;
2804 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2805 ABI */
2806 static bool
2807 contains_128bit_aligned_vector_p (tree type)
2809 enum machine_mode mode = TYPE_MODE (type);
2810 if (SSE_REG_MODE_P (mode)
2811 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2812 return true;
2813 if (TYPE_ALIGN (type) < 128)
2814 return false;
2816 if (AGGREGATE_TYPE_P (type))
2818 /* Walk the aggregates recursively. */
2819 if (TREE_CODE (type) == RECORD_TYPE
2820 || TREE_CODE (type) == UNION_TYPE
2821 || TREE_CODE (type) == QUAL_UNION_TYPE)
2823 tree field;
2825 if (TYPE_BINFO (type))
2827 tree binfo, base_binfo;
2828 int i;
2830 for (binfo = TYPE_BINFO (type), i = 0;
2831 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2832 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2833 return true;
2835 /* And now merge the fields of structure. */
2836 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2838 if (TREE_CODE (field) == FIELD_DECL
2839 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2840 return true;
2843 /* Just for use if some languages passes arrays by value. */
2844 else if (TREE_CODE (type) == ARRAY_TYPE)
2846 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2847 return true;
2849 else
2850 abort ();
2852 return false;
2855 /* Gives the alignment boundary, in bits, of an argument with the
2856 specified mode and type. */
2859 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2861 int align;
2862 if (type)
2863 align = TYPE_ALIGN (type);
2864 else
2865 align = GET_MODE_ALIGNMENT (mode);
2866 if (align < PARM_BOUNDARY)
2867 align = PARM_BOUNDARY;
2868 if (!TARGET_64BIT)
2870 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2871 make an exception for SSE modes since these require 128bit
2872 alignment.
2874 The handling here differs from field_alignment. ICC aligns MMX
2875 arguments to 4 byte boundaries, while structure fields are aligned
2876 to 8 byte boundaries. */
2877 if (!TARGET_SSE)
2878 align = PARM_BOUNDARY;
2879 else if (!type)
2881 if (!SSE_REG_MODE_P (mode))
2882 align = PARM_BOUNDARY;
2884 else
2886 if (!contains_128bit_aligned_vector_p (type))
2887 align = PARM_BOUNDARY;
2890 if (align > 128)
2891 align = 128;
2892 return align;
2895 /* Return true if N is a possible register number of function value. */
2896 bool
2897 ix86_function_value_regno_p (int regno)
2899 if (!TARGET_64BIT)
2901 return ((regno) == 0
2902 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2903 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2905 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2906 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2907 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2910 /* Define how to find the value returned by a function.
2911 VALTYPE is the data type of the value (as a tree).
2912 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2913 otherwise, FUNC is 0. */
2915 ix86_function_value (tree valtype)
2917 if (TARGET_64BIT)
2919 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2920 REGPARM_MAX, SSE_REGPARM_MAX,
2921 x86_64_int_return_registers, 0);
2922 /* For zero sized structures, construct_container return NULL, but we need
2923 to keep rest of compiler happy by returning meaningful value. */
2924 if (!ret)
2925 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2926 return ret;
2928 else
2929 return gen_rtx_REG (TYPE_MODE (valtype),
2930 ix86_value_regno (TYPE_MODE (valtype)));
2933 /* Return false iff type is returned in memory. */
2935 ix86_return_in_memory (tree type)
2937 int needed_intregs, needed_sseregs, size;
2938 enum machine_mode mode = TYPE_MODE (type);
2940 if (TARGET_64BIT)
2941 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2943 if (mode == BLKmode)
2944 return 1;
2946 size = int_size_in_bytes (type);
2948 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2949 return 0;
2951 if (VECTOR_MODE_P (mode) || mode == TImode)
2953 /* User-created vectors small enough to fit in EAX. */
2954 if (size < 8)
2955 return 0;
2957 /* MMX/3dNow values are returned on the stack, since we've
2958 got to EMMS/FEMMS before returning. */
2959 if (size == 8)
2960 return 1;
2962 /* SSE values are returned in XMM0, except when it doesn't exist. */
2963 if (size == 16)
2964 return (TARGET_SSE ? 0 : 1);
2967 if (mode == XFmode)
2968 return 0;
2970 if (size > 12)
2971 return 1;
2972 return 0;
2975 /* When returning SSE vector types, we have a choice of either
2976 (1) being abi incompatible with a -march switch, or
2977 (2) generating an error.
2978 Given no good solution, I think the safest thing is one warning.
2979 The user won't be able to use -Werror, but....
2981 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2982 called in response to actually generating a caller or callee that
2983 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2984 via aggregate_value_p for general type probing from tree-ssa. */
2986 static rtx
2987 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
2989 static bool warned;
2991 if (!TARGET_SSE && type && !warned)
2993 /* Look at the return type of the function, not the function type. */
2994 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
2996 if (mode == TImode
2997 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2999 warned = true;
3000 warning ("SSE vector return without SSE enabled changes the ABI");
3004 return NULL;
3007 /* Define how to find the value returned by a library function
3008 assuming the value has mode MODE. */
3010 ix86_libcall_value (enum machine_mode mode)
3012 if (TARGET_64BIT)
3014 switch (mode)
3016 case SFmode:
3017 case SCmode:
3018 case DFmode:
3019 case DCmode:
3020 case TFmode:
3021 return gen_rtx_REG (mode, FIRST_SSE_REG);
3022 case XFmode:
3023 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3024 case XCmode:
3025 case TCmode:
3026 return NULL;
3027 default:
3028 return gen_rtx_REG (mode, 0);
3031 else
3032 return gen_rtx_REG (mode, ix86_value_regno (mode));
3035 /* Given a mode, return the register to use for a return value. */
3037 static int
3038 ix86_value_regno (enum machine_mode mode)
3040 /* Floating point return values in %st(0). */
3041 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3042 return FIRST_FLOAT_REG;
3043 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3044 we prevent this case when sse is not available. */
3045 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3046 return FIRST_SSE_REG;
3047 /* Everything else in %eax. */
3048 return 0;
3051 /* Create the va_list data type. */
3053 static tree
3054 ix86_build_builtin_va_list (void)
3056 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3058 /* For i386 we use plain pointer to argument area. */
3059 if (!TARGET_64BIT)
3060 return build_pointer_type (char_type_node);
3062 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3063 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3065 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3066 unsigned_type_node);
3067 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3068 unsigned_type_node);
3069 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3070 ptr_type_node);
3071 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3072 ptr_type_node);
3074 DECL_FIELD_CONTEXT (f_gpr) = record;
3075 DECL_FIELD_CONTEXT (f_fpr) = record;
3076 DECL_FIELD_CONTEXT (f_ovf) = record;
3077 DECL_FIELD_CONTEXT (f_sav) = record;
3079 TREE_CHAIN (record) = type_decl;
3080 TYPE_NAME (record) = type_decl;
3081 TYPE_FIELDS (record) = f_gpr;
3082 TREE_CHAIN (f_gpr) = f_fpr;
3083 TREE_CHAIN (f_fpr) = f_ovf;
3084 TREE_CHAIN (f_ovf) = f_sav;
3086 layout_type (record);
3088 /* The correct type is an array type of one element. */
3089 return build_array_type (record, build_index_type (size_zero_node));
3092 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3094 static void
3095 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3096 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3097 int no_rtl)
3099 CUMULATIVE_ARGS next_cum;
3100 rtx save_area = NULL_RTX, mem;
3101 rtx label;
3102 rtx label_ref;
3103 rtx tmp_reg;
3104 rtx nsse_reg;
3105 int set;
3106 tree fntype;
3107 int stdarg_p;
3108 int i;
3110 if (!TARGET_64BIT)
3111 return;
3113 /* Indicate to allocate space on the stack for varargs save area. */
3114 ix86_save_varrargs_registers = 1;
3116 cfun->stack_alignment_needed = 128;
3118 fntype = TREE_TYPE (current_function_decl);
3119 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3120 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3121 != void_type_node));
3123 /* For varargs, we do not want to skip the dummy va_dcl argument.
3124 For stdargs, we do want to skip the last named argument. */
3125 next_cum = *cum;
3126 if (stdarg_p)
3127 function_arg_advance (&next_cum, mode, type, 1);
3129 if (!no_rtl)
3130 save_area = frame_pointer_rtx;
3132 set = get_varargs_alias_set ();
3134 for (i = next_cum.regno; i < ix86_regparm; i++)
3136 mem = gen_rtx_MEM (Pmode,
3137 plus_constant (save_area, i * UNITS_PER_WORD));
3138 set_mem_alias_set (mem, set);
3139 emit_move_insn (mem, gen_rtx_REG (Pmode,
3140 x86_64_int_parameter_registers[i]));
3143 if (next_cum.sse_nregs)
3145 /* Now emit code to save SSE registers. The AX parameter contains number
3146 of SSE parameter registers used to call this function. We use
3147 sse_prologue_save insn template that produces computed jump across
3148 SSE saves. We need some preparation work to get this working. */
3150 label = gen_label_rtx ();
3151 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3153 /* Compute address to jump to :
3154 label - 5*eax + nnamed_sse_arguments*5 */
3155 tmp_reg = gen_reg_rtx (Pmode);
3156 nsse_reg = gen_reg_rtx (Pmode);
3157 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3158 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3159 gen_rtx_MULT (Pmode, nsse_reg,
3160 GEN_INT (4))));
3161 if (next_cum.sse_regno)
3162 emit_move_insn
3163 (nsse_reg,
3164 gen_rtx_CONST (DImode,
3165 gen_rtx_PLUS (DImode,
3166 label_ref,
3167 GEN_INT (next_cum.sse_regno * 4))));
3168 else
3169 emit_move_insn (nsse_reg, label_ref);
3170 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3172 /* Compute address of memory block we save into. We always use pointer
3173 pointing 127 bytes after first byte to store - this is needed to keep
3174 instruction size limited by 4 bytes. */
3175 tmp_reg = gen_reg_rtx (Pmode);
3176 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3177 plus_constant (save_area,
3178 8 * REGPARM_MAX + 127)));
3179 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3180 set_mem_alias_set (mem, set);
3181 set_mem_align (mem, BITS_PER_WORD);
3183 /* And finally do the dirty job! */
3184 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3185 GEN_INT (next_cum.sse_regno), label));
3190 /* Implement va_start. */
3192 void
3193 ix86_va_start (tree valist, rtx nextarg)
3195 HOST_WIDE_INT words, n_gpr, n_fpr;
3196 tree f_gpr, f_fpr, f_ovf, f_sav;
3197 tree gpr, fpr, ovf, sav, t;
3199 /* Only 64bit target needs something special. */
3200 if (!TARGET_64BIT)
3202 std_expand_builtin_va_start (valist, nextarg);
3203 return;
3206 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3207 f_fpr = TREE_CHAIN (f_gpr);
3208 f_ovf = TREE_CHAIN (f_fpr);
3209 f_sav = TREE_CHAIN (f_ovf);
3211 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3212 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3213 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3214 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3215 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3217 /* Count number of gp and fp argument registers used. */
3218 words = current_function_args_info.words;
3219 n_gpr = current_function_args_info.regno;
3220 n_fpr = current_function_args_info.sse_regno;
3222 if (TARGET_DEBUG_ARG)
3223 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3224 (int) words, (int) n_gpr, (int) n_fpr);
3226 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3227 build_int_2 (n_gpr * 8, 0));
3228 TREE_SIDE_EFFECTS (t) = 1;
3229 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3231 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3232 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3233 TREE_SIDE_EFFECTS (t) = 1;
3234 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3236 /* Find the overflow area. */
3237 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3238 if (words != 0)
3239 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3240 build_int_2 (words * UNITS_PER_WORD, 0));
3241 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3242 TREE_SIDE_EFFECTS (t) = 1;
3243 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3245 /* Find the register save area.
3246 Prologue of the function save it right above stack frame. */
3247 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3248 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3249 TREE_SIDE_EFFECTS (t) = 1;
3250 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3253 /* Implement va_arg. */
3255 tree
3256 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3258 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3259 tree f_gpr, f_fpr, f_ovf, f_sav;
3260 tree gpr, fpr, ovf, sav, t;
3261 int size, rsize;
3262 tree lab_false, lab_over = NULL_TREE;
3263 tree addr, t2;
3264 rtx container;
3265 int indirect_p = 0;
3266 tree ptrtype;
3268 /* Only 64bit target needs something special. */
3269 if (!TARGET_64BIT)
3270 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3272 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3273 f_fpr = TREE_CHAIN (f_gpr);
3274 f_ovf = TREE_CHAIN (f_fpr);
3275 f_sav = TREE_CHAIN (f_ovf);
3277 valist = build_fold_indirect_ref (valist);
3278 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3279 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3280 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3281 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3283 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3284 if (indirect_p)
3285 type = build_pointer_type (type);
3286 size = int_size_in_bytes (type);
3287 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3289 container = construct_container (TYPE_MODE (type), type, 0,
3290 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3292 * Pull the value out of the saved registers ...
3295 addr = create_tmp_var (ptr_type_node, "addr");
3296 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3298 if (container)
3300 int needed_intregs, needed_sseregs;
3301 bool need_temp;
3302 tree int_addr, sse_addr;
3304 lab_false = create_artificial_label ();
3305 lab_over = create_artificial_label ();
3307 examine_argument (TYPE_MODE (type), type, 0,
3308 &needed_intregs, &needed_sseregs);
3310 need_temp = (!REG_P (container)
3311 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3312 || TYPE_ALIGN (type) > 128));
3314 /* In case we are passing structure, verify that it is consecutive block
3315 on the register save area. If not we need to do moves. */
3316 if (!need_temp && !REG_P (container))
3318 /* Verify that all registers are strictly consecutive */
3319 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3321 int i;
3323 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3325 rtx slot = XVECEXP (container, 0, i);
3326 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3327 || INTVAL (XEXP (slot, 1)) != i * 16)
3328 need_temp = 1;
3331 else
3333 int i;
3335 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3337 rtx slot = XVECEXP (container, 0, i);
3338 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3339 || INTVAL (XEXP (slot, 1)) != i * 8)
3340 need_temp = 1;
3344 if (!need_temp)
3346 int_addr = addr;
3347 sse_addr = addr;
3349 else
3351 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3352 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3353 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3354 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3356 /* First ensure that we fit completely in registers. */
3357 if (needed_intregs)
3359 t = build_int_2 ((REGPARM_MAX - needed_intregs + 1) * 8, 0);
3360 TREE_TYPE (t) = TREE_TYPE (gpr);
3361 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3362 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3363 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3364 gimplify_and_add (t, pre_p);
3366 if (needed_sseregs)
3368 t = build_int_2 ((SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3369 + REGPARM_MAX * 8, 0);
3370 TREE_TYPE (t) = TREE_TYPE (fpr);
3371 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3372 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3373 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3374 gimplify_and_add (t, pre_p);
3377 /* Compute index to start of area used for integer regs. */
3378 if (needed_intregs)
3380 /* int_addr = gpr + sav; */
3381 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3382 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3383 gimplify_and_add (t, pre_p);
3385 if (needed_sseregs)
3387 /* sse_addr = fpr + sav; */
3388 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3389 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3390 gimplify_and_add (t, pre_p);
3392 if (need_temp)
3394 int i;
3395 tree temp = create_tmp_var (type, "va_arg_tmp");
3397 /* addr = &temp; */
3398 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3399 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3400 gimplify_and_add (t, pre_p);
3402 for (i = 0; i < XVECLEN (container, 0); i++)
3404 rtx slot = XVECEXP (container, 0, i);
3405 rtx reg = XEXP (slot, 0);
3406 enum machine_mode mode = GET_MODE (reg);
3407 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3408 tree addr_type = build_pointer_type (piece_type);
3409 tree src_addr, src;
3410 int src_offset;
3411 tree dest_addr, dest;
3413 if (SSE_REGNO_P (REGNO (reg)))
3415 src_addr = sse_addr;
3416 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3418 else
3420 src_addr = int_addr;
3421 src_offset = REGNO (reg) * 8;
3423 src_addr = fold_convert (addr_type, src_addr);
3424 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3425 size_int (src_offset)));
3426 src = build_fold_indirect_ref (src_addr);
3428 dest_addr = fold_convert (addr_type, addr);
3429 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3430 size_int (INTVAL (XEXP (slot, 1)))));
3431 dest = build_fold_indirect_ref (dest_addr);
3433 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3434 gimplify_and_add (t, pre_p);
3438 if (needed_intregs)
3440 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3441 build_int_2 (needed_intregs * 8, 0));
3442 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3443 gimplify_and_add (t, pre_p);
3445 if (needed_sseregs)
3448 build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3449 build_int_2 (needed_sseregs * 16, 0));
3450 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3451 gimplify_and_add (t, pre_p);
3454 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3455 gimplify_and_add (t, pre_p);
3457 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3458 append_to_statement_list (t, pre_p);
3461 /* ... otherwise out of the overflow area. */
3463 /* Care for on-stack alignment if needed. */
3464 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3465 t = ovf;
3466 else
3468 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3469 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3470 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3472 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3474 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3475 gimplify_and_add (t2, pre_p);
3477 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3478 build_int_2 (rsize * UNITS_PER_WORD, 0));
3479 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3480 gimplify_and_add (t, pre_p);
3482 if (container)
3484 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3485 append_to_statement_list (t, pre_p);
3488 ptrtype = build_pointer_type (type);
3489 addr = fold_convert (ptrtype, addr);
3491 if (indirect_p)
3492 addr = build_fold_indirect_ref (addr);
3493 return build_fold_indirect_ref (addr);
3496 /* Return nonzero if OP is either a i387 or SSE fp register. */
3498 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3500 return ANY_FP_REG_P (op);
3503 /* Return nonzero if OP is an i387 fp register. */
3505 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3507 return FP_REG_P (op);
3510 /* Return nonzero if OP is a non-fp register_operand. */
3512 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3514 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3517 /* Return nonzero if OP is a register operand other than an
3518 i387 fp register. */
3520 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3522 return register_operand (op, mode) && !FP_REG_P (op);
3525 /* Return nonzero if OP is general operand representable on x86_64. */
3528 x86_64_general_operand (rtx op, enum machine_mode mode)
3530 if (!TARGET_64BIT)
3531 return general_operand (op, mode);
3532 if (nonimmediate_operand (op, mode))
3533 return 1;
3534 return x86_64_sign_extended_value (op);
3537 /* Return nonzero if OP is general operand representable on x86_64
3538 as either sign extended or zero extended constant. */
3541 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3543 if (!TARGET_64BIT)
3544 return general_operand (op, mode);
3545 if (nonimmediate_operand (op, mode))
3546 return 1;
3547 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3550 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3553 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3555 if (!TARGET_64BIT)
3556 return nonmemory_operand (op, mode);
3557 if (register_operand (op, mode))
3558 return 1;
3559 return x86_64_sign_extended_value (op);
3562 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3565 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3567 if (!TARGET_64BIT || !flag_pic)
3568 return nonmemory_operand (op, mode);
3569 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3570 return 1;
3571 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3572 return 1;
3573 return 0;
3576 /* Return nonzero if OPNUM's MEM should be matched
3577 in movabs* patterns. */
3580 ix86_check_movabs (rtx insn, int opnum)
3582 rtx set, mem;
3584 set = PATTERN (insn);
3585 if (GET_CODE (set) == PARALLEL)
3586 set = XVECEXP (set, 0, 0);
3587 if (GET_CODE (set) != SET)
3588 abort ();
3589 mem = XEXP (set, opnum);
3590 while (GET_CODE (mem) == SUBREG)
3591 mem = SUBREG_REG (mem);
3592 if (GET_CODE (mem) != MEM)
3593 abort ();
3594 return (volatile_ok || !MEM_VOLATILE_P (mem));
3597 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3600 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3602 if (!TARGET_64BIT)
3603 return nonmemory_operand (op, mode);
3604 if (register_operand (op, mode))
3605 return 1;
3606 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3609 /* Return nonzero if OP is immediate operand representable on x86_64. */
3612 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3614 if (!TARGET_64BIT)
3615 return immediate_operand (op, mode);
3616 return x86_64_sign_extended_value (op);
3619 /* Return nonzero if OP is immediate operand representable on x86_64. */
3622 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3624 return x86_64_zero_extended_value (op);
3627 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3628 for shift & compare patterns, as shifting by 0 does not change flags),
3629 else return zero. */
3632 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3634 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3637 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3638 reference and a constant. */
3641 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3643 switch (GET_CODE (op))
3645 case SYMBOL_REF:
3646 case LABEL_REF:
3647 return 1;
3649 case CONST:
3650 op = XEXP (op, 0);
3651 if (GET_CODE (op) == SYMBOL_REF
3652 || GET_CODE (op) == LABEL_REF
3653 || (GET_CODE (op) == UNSPEC
3654 && (XINT (op, 1) == UNSPEC_GOT
3655 || XINT (op, 1) == UNSPEC_GOTOFF
3656 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3657 return 1;
3658 if (GET_CODE (op) != PLUS
3659 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3660 return 0;
3662 op = XEXP (op, 0);
3663 if (GET_CODE (op) == SYMBOL_REF
3664 || GET_CODE (op) == LABEL_REF)
3665 return 1;
3666 /* Only @GOTOFF gets offsets. */
3667 if (GET_CODE (op) != UNSPEC
3668 || XINT (op, 1) != UNSPEC_GOTOFF)
3669 return 0;
3671 op = XVECEXP (op, 0, 0);
3672 if (GET_CODE (op) == SYMBOL_REF
3673 || GET_CODE (op) == LABEL_REF)
3674 return 1;
3675 return 0;
3677 default:
3678 return 0;
3682 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3685 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3687 if (GET_CODE (op) != CONST)
3688 return 0;
3689 op = XEXP (op, 0);
3690 if (TARGET_64BIT)
3692 if (GET_CODE (op) == UNSPEC
3693 && XINT (op, 1) == UNSPEC_GOTPCREL)
3694 return 1;
3695 if (GET_CODE (op) == PLUS
3696 && GET_CODE (XEXP (op, 0)) == UNSPEC
3697 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3698 return 1;
3700 else
3702 if (GET_CODE (op) == UNSPEC)
3703 return 1;
3704 if (GET_CODE (op) != PLUS
3705 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3706 return 0;
3707 op = XEXP (op, 0);
3708 if (GET_CODE (op) == UNSPEC)
3709 return 1;
3711 return 0;
3714 /* Return true if OP is a symbolic operand that resolves locally. */
3716 static int
3717 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3719 if (GET_CODE (op) == CONST
3720 && GET_CODE (XEXP (op, 0)) == PLUS
3721 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3722 op = XEXP (XEXP (op, 0), 0);
3724 if (GET_CODE (op) == LABEL_REF)
3725 return 1;
3727 if (GET_CODE (op) != SYMBOL_REF)
3728 return 0;
3730 if (SYMBOL_REF_LOCAL_P (op))
3731 return 1;
3733 /* There is, however, a not insubstantial body of code in the rest of
3734 the compiler that assumes it can just stick the results of
3735 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3736 /* ??? This is a hack. Should update the body of the compiler to
3737 always create a DECL an invoke targetm.encode_section_info. */
3738 if (strncmp (XSTR (op, 0), internal_label_prefix,
3739 internal_label_prefix_len) == 0)
3740 return 1;
3742 return 0;
3745 /* Test for various thread-local symbols. */
3748 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3750 if (GET_CODE (op) != SYMBOL_REF)
3751 return 0;
3752 return SYMBOL_REF_TLS_MODEL (op);
3755 static inline int
3756 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3758 if (GET_CODE (op) != SYMBOL_REF)
3759 return 0;
3760 return SYMBOL_REF_TLS_MODEL (op) == kind;
3764 global_dynamic_symbolic_operand (rtx op,
3765 enum machine_mode mode ATTRIBUTE_UNUSED)
3767 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3771 local_dynamic_symbolic_operand (rtx op,
3772 enum machine_mode mode ATTRIBUTE_UNUSED)
3774 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3778 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3780 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3784 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3786 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3789 /* Test for a valid operand for a call instruction. Don't allow the
3790 arg pointer register or virtual regs since they may decay into
3791 reg + const, which the patterns can't handle. */
3794 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3796 /* Disallow indirect through a virtual register. This leads to
3797 compiler aborts when trying to eliminate them. */
3798 if (GET_CODE (op) == REG
3799 && (op == arg_pointer_rtx
3800 || op == frame_pointer_rtx
3801 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3802 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3803 return 0;
3805 /* Disallow `call 1234'. Due to varying assembler lameness this
3806 gets either rejected or translated to `call .+1234'. */
3807 if (GET_CODE (op) == CONST_INT)
3808 return 0;
3810 /* Explicitly allow SYMBOL_REF even if pic. */
3811 if (GET_CODE (op) == SYMBOL_REF)
3812 return 1;
3814 /* Otherwise we can allow any general_operand in the address. */
3815 return general_operand (op, Pmode);
3818 /* Test for a valid operand for a call instruction. Don't allow the
3819 arg pointer register or virtual regs since they may decay into
3820 reg + const, which the patterns can't handle. */
3823 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3825 /* Disallow indirect through a virtual register. This leads to
3826 compiler aborts when trying to eliminate them. */
3827 if (GET_CODE (op) == REG
3828 && (op == arg_pointer_rtx
3829 || op == frame_pointer_rtx
3830 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3831 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3832 return 0;
3834 /* Explicitly allow SYMBOL_REF even if pic. */
3835 if (GET_CODE (op) == SYMBOL_REF)
3836 return 1;
3838 /* Otherwise we can only allow register operands. */
3839 return register_operand (op, Pmode);
3843 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3845 if (GET_CODE (op) == CONST
3846 && GET_CODE (XEXP (op, 0)) == PLUS
3847 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3848 op = XEXP (XEXP (op, 0), 0);
3849 return GET_CODE (op) == SYMBOL_REF;
3852 /* Match exactly zero and one. */
3855 const0_operand (rtx op, enum machine_mode mode)
3857 return op == CONST0_RTX (mode);
3861 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3863 return op == const1_rtx;
3866 /* Match 2, 4, or 8. Used for leal multiplicands. */
3869 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3871 return (GET_CODE (op) == CONST_INT
3872 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3876 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3878 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3882 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3884 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3888 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3890 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3894 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3896 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3900 /* True if this is a constant appropriate for an increment or decrement. */
3903 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3905 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3906 registers, since carry flag is not set. */
3907 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
3908 return 0;
3909 return op == const1_rtx || op == constm1_rtx;
3912 /* Return nonzero if OP is acceptable as operand of DImode shift
3913 expander. */
3916 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3918 if (TARGET_64BIT)
3919 return nonimmediate_operand (op, mode);
3920 else
3921 return register_operand (op, mode);
3924 /* Return false if this is the stack pointer, or any other fake
3925 register eliminable to the stack pointer. Otherwise, this is
3926 a register operand.
3928 This is used to prevent esp from being used as an index reg.
3929 Which would only happen in pathological cases. */
3932 reg_no_sp_operand (rtx op, enum machine_mode mode)
3934 rtx t = op;
3935 if (GET_CODE (t) == SUBREG)
3936 t = SUBREG_REG (t);
3937 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3938 return 0;
3940 return register_operand (op, mode);
3944 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3946 return MMX_REG_P (op);
3949 /* Return false if this is any eliminable register. Otherwise
3950 general_operand. */
3953 general_no_elim_operand (rtx op, enum machine_mode mode)
3955 rtx t = op;
3956 if (GET_CODE (t) == SUBREG)
3957 t = SUBREG_REG (t);
3958 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3959 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3960 || t == virtual_stack_dynamic_rtx)
3961 return 0;
3962 if (REG_P (t)
3963 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3964 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3965 return 0;
3967 return general_operand (op, mode);
3970 /* Return false if this is any eliminable register. Otherwise
3971 register_operand or const_int. */
3974 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3976 rtx t = op;
3977 if (GET_CODE (t) == SUBREG)
3978 t = SUBREG_REG (t);
3979 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3980 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3981 || t == virtual_stack_dynamic_rtx)
3982 return 0;
3984 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3987 /* Return false if this is any eliminable register or stack register,
3988 otherwise work like register_operand. */
3991 index_register_operand (rtx op, enum machine_mode mode)
3993 rtx t = op;
3994 if (GET_CODE (t) == SUBREG)
3995 t = SUBREG_REG (t);
3996 if (!REG_P (t))
3997 return 0;
3998 if (t == arg_pointer_rtx
3999 || t == frame_pointer_rtx
4000 || t == virtual_incoming_args_rtx
4001 || t == virtual_stack_vars_rtx
4002 || t == virtual_stack_dynamic_rtx
4003 || REGNO (t) == STACK_POINTER_REGNUM)
4004 return 0;
4006 return general_operand (op, mode);
4009 /* Return true if op is a Q_REGS class register. */
4012 q_regs_operand (rtx op, enum machine_mode mode)
4014 if (mode != VOIDmode && GET_MODE (op) != mode)
4015 return 0;
4016 if (GET_CODE (op) == SUBREG)
4017 op = SUBREG_REG (op);
4018 return ANY_QI_REG_P (op);
4021 /* Return true if op is an flags register. */
4024 flags_reg_operand (rtx op, enum machine_mode mode)
4026 if (mode != VOIDmode && GET_MODE (op) != mode)
4027 return 0;
4028 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
4031 /* Return true if op is a NON_Q_REGS class register. */
4034 non_q_regs_operand (rtx op, enum machine_mode mode)
4036 if (mode != VOIDmode && GET_MODE (op) != mode)
4037 return 0;
4038 if (GET_CODE (op) == SUBREG)
4039 op = SUBREG_REG (op);
4040 return NON_QI_REG_P (op);
4044 zero_extended_scalar_load_operand (rtx op,
4045 enum machine_mode mode ATTRIBUTE_UNUSED)
4047 unsigned n_elts;
4048 if (GET_CODE (op) != MEM)
4049 return 0;
4050 op = maybe_get_pool_constant (op);
4051 if (!op)
4052 return 0;
4053 if (GET_CODE (op) != CONST_VECTOR)
4054 return 0;
4055 n_elts =
4056 (GET_MODE_SIZE (GET_MODE (op)) /
4057 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
4058 for (n_elts--; n_elts > 0; n_elts--)
4060 rtx elt = CONST_VECTOR_ELT (op, n_elts);
4061 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
4062 return 0;
4064 return 1;
4067 /* Return 1 when OP is operand acceptable for standard SSE move. */
4069 vector_move_operand (rtx op, enum machine_mode mode)
4071 if (nonimmediate_operand (op, mode))
4072 return 1;
4073 if (GET_MODE (op) != mode && mode != VOIDmode)
4074 return 0;
4075 return (op == CONST0_RTX (GET_MODE (op)));
4078 /* Return true if op if a valid address, and does not contain
4079 a segment override. */
4082 no_seg_address_operand (rtx op, enum machine_mode mode)
4084 struct ix86_address parts;
4086 if (! address_operand (op, mode))
4087 return 0;
4089 if (! ix86_decompose_address (op, &parts))
4090 abort ();
4092 return parts.seg == SEG_DEFAULT;
4095 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4096 insns. */
4098 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4100 enum rtx_code code = GET_CODE (op);
4101 switch (code)
4103 /* Operations supported directly. */
4104 case EQ:
4105 case LT:
4106 case LE:
4107 case UNORDERED:
4108 case NE:
4109 case UNGE:
4110 case UNGT:
4111 case ORDERED:
4112 return 1;
4113 /* These are equivalent to ones above in non-IEEE comparisons. */
4114 case UNEQ:
4115 case UNLT:
4116 case UNLE:
4117 case LTGT:
4118 case GE:
4119 case GT:
4120 return !TARGET_IEEE_FP;
4121 default:
4122 return 0;
4125 /* Return 1 if OP is a valid comparison operator in valid mode. */
4127 ix86_comparison_operator (rtx op, enum machine_mode mode)
4129 enum machine_mode inmode;
4130 enum rtx_code code = GET_CODE (op);
4131 if (mode != VOIDmode && GET_MODE (op) != mode)
4132 return 0;
4133 if (!COMPARISON_P (op))
4134 return 0;
4135 inmode = GET_MODE (XEXP (op, 0));
4137 if (inmode == CCFPmode || inmode == CCFPUmode)
4139 enum rtx_code second_code, bypass_code;
4140 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4141 return (bypass_code == NIL && second_code == NIL);
4143 switch (code)
4145 case EQ: case NE:
4146 return 1;
4147 case LT: case GE:
4148 if (inmode == CCmode || inmode == CCGCmode
4149 || inmode == CCGOCmode || inmode == CCNOmode)
4150 return 1;
4151 return 0;
4152 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4153 if (inmode == CCmode)
4154 return 1;
4155 return 0;
4156 case GT: case LE:
4157 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4158 return 1;
4159 return 0;
4160 default:
4161 return 0;
4165 /* Return 1 if OP is a valid comparison operator testing carry flag
4166 to be set. */
4168 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4170 enum machine_mode inmode;
4171 enum rtx_code code = GET_CODE (op);
4173 if (mode != VOIDmode && GET_MODE (op) != mode)
4174 return 0;
4175 if (!COMPARISON_P (op))
4176 return 0;
4177 inmode = GET_MODE (XEXP (op, 0));
4178 if (GET_CODE (XEXP (op, 0)) != REG
4179 || REGNO (XEXP (op, 0)) != 17
4180 || XEXP (op, 1) != const0_rtx)
4181 return 0;
4183 if (inmode == CCFPmode || inmode == CCFPUmode)
4185 enum rtx_code second_code, bypass_code;
4187 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4188 if (bypass_code != NIL || second_code != NIL)
4189 return 0;
4190 code = ix86_fp_compare_code_to_integer (code);
4192 else if (inmode != CCmode)
4193 return 0;
4194 return code == LTU;
4197 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4200 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4202 enum machine_mode inmode;
4203 enum rtx_code code = GET_CODE (op);
4205 if (mode != VOIDmode && GET_MODE (op) != mode)
4206 return 0;
4207 if (!COMPARISON_P (op))
4208 return 0;
4209 inmode = GET_MODE (XEXP (op, 0));
4210 if (inmode == CCFPmode || inmode == CCFPUmode)
4212 enum rtx_code second_code, bypass_code;
4214 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4215 if (bypass_code != NIL || second_code != NIL)
4216 return 0;
4217 code = ix86_fp_compare_code_to_integer (code);
4219 /* i387 supports just limited amount of conditional codes. */
4220 switch (code)
4222 case LTU: case GTU: case LEU: case GEU:
4223 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4224 return 1;
4225 return 0;
4226 case ORDERED: case UNORDERED:
4227 case EQ: case NE:
4228 return 1;
4229 default:
4230 return 0;
4234 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4237 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4239 switch (GET_CODE (op))
4241 case MULT:
4242 /* Modern CPUs have same latency for HImode and SImode multiply,
4243 but 386 and 486 do HImode multiply faster. */
4244 return ix86_tune > PROCESSOR_I486;
4245 case PLUS:
4246 case AND:
4247 case IOR:
4248 case XOR:
4249 case ASHIFT:
4250 return 1;
4251 default:
4252 return 0;
4256 /* Nearly general operand, but accept any const_double, since we wish
4257 to be able to drop them into memory rather than have them get pulled
4258 into registers. */
4261 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4263 if (mode != VOIDmode && mode != GET_MODE (op))
4264 return 0;
4265 if (GET_CODE (op) == CONST_DOUBLE)
4266 return 1;
4267 return general_operand (op, mode);
4270 /* Match an SI or HImode register for a zero_extract. */
4273 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4275 int regno;
4276 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4277 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4278 return 0;
4280 if (!register_operand (op, VOIDmode))
4281 return 0;
4283 /* Be careful to accept only registers having upper parts. */
4284 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4285 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4288 /* Return 1 if this is a valid binary floating-point operation.
4289 OP is the expression matched, and MODE is its mode. */
4292 binary_fp_operator (rtx op, enum machine_mode mode)
4294 if (mode != VOIDmode && mode != GET_MODE (op))
4295 return 0;
4297 switch (GET_CODE (op))
4299 case PLUS:
4300 case MINUS:
4301 case MULT:
4302 case DIV:
4303 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4305 default:
4306 return 0;
4311 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4313 return GET_CODE (op) == MULT;
4317 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4319 return GET_CODE (op) == DIV;
4323 arith_or_logical_operator (rtx op, enum machine_mode mode)
4325 return ((mode == VOIDmode || GET_MODE (op) == mode)
4326 && ARITHMETIC_P (op));
4329 /* Returns 1 if OP is memory operand with a displacement. */
4332 memory_displacement_operand (rtx op, enum machine_mode mode)
4334 struct ix86_address parts;
4336 if (! memory_operand (op, mode))
4337 return 0;
4339 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4340 abort ();
4342 return parts.disp != NULL_RTX;
4345 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4346 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4348 ??? It seems likely that this will only work because cmpsi is an
4349 expander, and no actual insns use this. */
4352 cmpsi_operand (rtx op, enum machine_mode mode)
4354 if (nonimmediate_operand (op, mode))
4355 return 1;
4357 if (GET_CODE (op) == AND
4358 && GET_MODE (op) == SImode
4359 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4360 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4361 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4362 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4363 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4364 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4365 return 1;
4367 return 0;
4370 /* Returns 1 if OP is memory operand that can not be represented by the
4371 modRM array. */
4374 long_memory_operand (rtx op, enum machine_mode mode)
4376 if (! memory_operand (op, mode))
4377 return 0;
4379 return memory_address_length (op) != 0;
4382 /* Return nonzero if the rtx is known aligned. */
4385 aligned_operand (rtx op, enum machine_mode mode)
4387 struct ix86_address parts;
4389 if (!general_operand (op, mode))
4390 return 0;
4392 /* Registers and immediate operands are always "aligned". */
4393 if (GET_CODE (op) != MEM)
4394 return 1;
4396 /* Don't even try to do any aligned optimizations with volatiles. */
4397 if (MEM_VOLATILE_P (op))
4398 return 0;
4400 op = XEXP (op, 0);
4402 /* Pushes and pops are only valid on the stack pointer. */
4403 if (GET_CODE (op) == PRE_DEC
4404 || GET_CODE (op) == POST_INC)
4405 return 1;
4407 /* Decode the address. */
4408 if (! ix86_decompose_address (op, &parts))
4409 abort ();
4411 /* Look for some component that isn't known to be aligned. */
4412 if (parts.index)
4414 if (parts.scale < 4
4415 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4416 return 0;
4418 if (parts.base)
4420 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4421 return 0;
4423 if (parts.disp)
4425 if (GET_CODE (parts.disp) != CONST_INT
4426 || (INTVAL (parts.disp) & 3) != 0)
4427 return 0;
4430 /* Didn't find one -- this must be an aligned address. */
4431 return 1;
4434 /* Initialize the table of extra 80387 mathematical constants. */
4436 static void
4437 init_ext_80387_constants (void)
4439 static const char * cst[5] =
4441 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4442 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4443 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4444 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4445 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4447 int i;
4449 for (i = 0; i < 5; i++)
4451 real_from_string (&ext_80387_constants_table[i], cst[i]);
4452 /* Ensure each constant is rounded to XFmode precision. */
4453 real_convert (&ext_80387_constants_table[i],
4454 XFmode, &ext_80387_constants_table[i]);
4457 ext_80387_constants_init = 1;
4460 /* Return true if the constant is something that can be loaded with
4461 a special instruction. */
4464 standard_80387_constant_p (rtx x)
4466 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4467 return -1;
4469 if (x == CONST0_RTX (GET_MODE (x)))
4470 return 1;
4471 if (x == CONST1_RTX (GET_MODE (x)))
4472 return 2;
4474 /* For XFmode constants, try to find a special 80387 instruction when
4475 optimizing for size or on those CPUs that benefit from them. */
4476 if (GET_MODE (x) == XFmode
4477 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4479 REAL_VALUE_TYPE r;
4480 int i;
4482 if (! ext_80387_constants_init)
4483 init_ext_80387_constants ();
4485 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4486 for (i = 0; i < 5; i++)
4487 if (real_identical (&r, &ext_80387_constants_table[i]))
4488 return i + 3;
4491 return 0;
4494 /* Return the opcode of the special instruction to be used to load
4495 the constant X. */
4497 const char *
4498 standard_80387_constant_opcode (rtx x)
4500 switch (standard_80387_constant_p (x))
4502 case 1:
4503 return "fldz";
4504 case 2:
4505 return "fld1";
4506 case 3:
4507 return "fldlg2";
4508 case 4:
4509 return "fldln2";
4510 case 5:
4511 return "fldl2e";
4512 case 6:
4513 return "fldl2t";
4514 case 7:
4515 return "fldpi";
4517 abort ();
4520 /* Return the CONST_DOUBLE representing the 80387 constant that is
4521 loaded by the specified special instruction. The argument IDX
4522 matches the return value from standard_80387_constant_p. */
4525 standard_80387_constant_rtx (int idx)
4527 int i;
4529 if (! ext_80387_constants_init)
4530 init_ext_80387_constants ();
4532 switch (idx)
4534 case 3:
4535 case 4:
4536 case 5:
4537 case 6:
4538 case 7:
4539 i = idx - 3;
4540 break;
4542 default:
4543 abort ();
4546 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4547 XFmode);
4550 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4553 standard_sse_constant_p (rtx x)
4555 if (x == const0_rtx)
4556 return 1;
4557 return (x == CONST0_RTX (GET_MODE (x)));
4560 /* Returns 1 if OP contains a symbol reference */
4563 symbolic_reference_mentioned_p (rtx op)
4565 const char *fmt;
4566 int i;
4568 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4569 return 1;
4571 fmt = GET_RTX_FORMAT (GET_CODE (op));
4572 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4574 if (fmt[i] == 'E')
4576 int j;
4578 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4579 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4580 return 1;
4583 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4584 return 1;
4587 return 0;
4590 /* Return 1 if it is appropriate to emit `ret' instructions in the
4591 body of a function. Do this only if the epilogue is simple, needing a
4592 couple of insns. Prior to reloading, we can't tell how many registers
4593 must be saved, so return 0 then. Return 0 if there is no frame
4594 marker to de-allocate.
4596 If NON_SAVING_SETJMP is defined and true, then it is not possible
4597 for the epilogue to be simple, so return 0. This is a special case
4598 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4599 until final, but jump_optimize may need to know sooner if a
4600 `return' is OK. */
4603 ix86_can_use_return_insn_p (void)
4605 struct ix86_frame frame;
4607 #ifdef NON_SAVING_SETJMP
4608 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4609 return 0;
4610 #endif
4612 if (! reload_completed || frame_pointer_needed)
4613 return 0;
4615 /* Don't allow more than 32 pop, since that's all we can do
4616 with one instruction. */
4617 if (current_function_pops_args
4618 && current_function_args_size >= 32768)
4619 return 0;
4621 ix86_compute_frame_layout (&frame);
4622 return frame.to_allocate == 0 && frame.nregs == 0;
4625 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4627 x86_64_sign_extended_value (rtx value)
4629 switch (GET_CODE (value))
4631 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4632 to be at least 32 and this all acceptable constants are
4633 represented as CONST_INT. */
4634 case CONST_INT:
4635 if (HOST_BITS_PER_WIDE_INT == 32)
4636 return 1;
4637 else
4639 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4640 return trunc_int_for_mode (val, SImode) == val;
4642 break;
4644 /* For certain code models, the symbolic references are known to fit.
4645 in CM_SMALL_PIC model we know it fits if it is local to the shared
4646 library. Don't count TLS SYMBOL_REFs here, since they should fit
4647 only if inside of UNSPEC handled below. */
4648 case SYMBOL_REF:
4649 /* TLS symbols are not constant. */
4650 if (tls_symbolic_operand (value, Pmode))
4651 return false;
4652 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4654 /* For certain code models, the code is near as well. */
4655 case LABEL_REF:
4656 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4657 || ix86_cmodel == CM_KERNEL);
4659 /* We also may accept the offsetted memory references in certain special
4660 cases. */
4661 case CONST:
4662 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4663 switch (XINT (XEXP (value, 0), 1))
4665 case UNSPEC_GOTPCREL:
4666 case UNSPEC_DTPOFF:
4667 case UNSPEC_GOTNTPOFF:
4668 case UNSPEC_NTPOFF:
4669 return 1;
4670 default:
4671 break;
4673 if (GET_CODE (XEXP (value, 0)) == PLUS)
4675 rtx op1 = XEXP (XEXP (value, 0), 0);
4676 rtx op2 = XEXP (XEXP (value, 0), 1);
4677 HOST_WIDE_INT offset;
4679 if (ix86_cmodel == CM_LARGE)
4680 return 0;
4681 if (GET_CODE (op2) != CONST_INT)
4682 return 0;
4683 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4684 switch (GET_CODE (op1))
4686 case SYMBOL_REF:
4687 /* For CM_SMALL assume that latest object is 16MB before
4688 end of 31bits boundary. We may also accept pretty
4689 large negative constants knowing that all objects are
4690 in the positive half of address space. */
4691 if (ix86_cmodel == CM_SMALL
4692 && offset < 16*1024*1024
4693 && trunc_int_for_mode (offset, SImode) == offset)
4694 return 1;
4695 /* For CM_KERNEL we know that all object resist in the
4696 negative half of 32bits address space. We may not
4697 accept negative offsets, since they may be just off
4698 and we may accept pretty large positive ones. */
4699 if (ix86_cmodel == CM_KERNEL
4700 && offset > 0
4701 && trunc_int_for_mode (offset, SImode) == offset)
4702 return 1;
4703 break;
4704 case LABEL_REF:
4705 /* These conditions are similar to SYMBOL_REF ones, just the
4706 constraints for code models differ. */
4707 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4708 && offset < 16*1024*1024
4709 && trunc_int_for_mode (offset, SImode) == offset)
4710 return 1;
4711 if (ix86_cmodel == CM_KERNEL
4712 && offset > 0
4713 && trunc_int_for_mode (offset, SImode) == offset)
4714 return 1;
4715 break;
4716 case UNSPEC:
4717 switch (XINT (op1, 1))
4719 case UNSPEC_DTPOFF:
4720 case UNSPEC_NTPOFF:
4721 if (offset > 0
4722 && trunc_int_for_mode (offset, SImode) == offset)
4723 return 1;
4725 break;
4726 default:
4727 return 0;
4730 return 0;
4731 default:
4732 return 0;
4736 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4738 x86_64_zero_extended_value (rtx value)
4740 switch (GET_CODE (value))
4742 case CONST_DOUBLE:
4743 if (HOST_BITS_PER_WIDE_INT == 32)
4744 return (GET_MODE (value) == VOIDmode
4745 && !CONST_DOUBLE_HIGH (value));
4746 else
4747 return 0;
4748 case CONST_INT:
4749 if (HOST_BITS_PER_WIDE_INT == 32)
4750 return INTVAL (value) >= 0;
4751 else
4752 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4753 break;
4755 /* For certain code models, the symbolic references are known to fit. */
4756 case SYMBOL_REF:
4757 /* TLS symbols are not constant. */
4758 if (tls_symbolic_operand (value, Pmode))
4759 return false;
4760 return ix86_cmodel == CM_SMALL;
4762 /* For certain code models, the code is near as well. */
4763 case LABEL_REF:
4764 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4766 /* We also may accept the offsetted memory references in certain special
4767 cases. */
4768 case CONST:
4769 if (GET_CODE (XEXP (value, 0)) == PLUS)
4771 rtx op1 = XEXP (XEXP (value, 0), 0);
4772 rtx op2 = XEXP (XEXP (value, 0), 1);
4774 if (ix86_cmodel == CM_LARGE)
4775 return 0;
4776 switch (GET_CODE (op1))
4778 case SYMBOL_REF:
4779 return 0;
4780 /* For small code model we may accept pretty large positive
4781 offsets, since one bit is available for free. Negative
4782 offsets are limited by the size of NULL pointer area
4783 specified by the ABI. */
4784 if (ix86_cmodel == CM_SMALL
4785 && GET_CODE (op2) == CONST_INT
4786 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4787 && (trunc_int_for_mode (INTVAL (op2), SImode)
4788 == INTVAL (op2)))
4789 return 1;
4790 /* ??? For the kernel, we may accept adjustment of
4791 -0x10000000, since we know that it will just convert
4792 negative address space to positive, but perhaps this
4793 is not worthwhile. */
4794 break;
4795 case LABEL_REF:
4796 /* These conditions are similar to SYMBOL_REF ones, just the
4797 constraints for code models differ. */
4798 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4799 && GET_CODE (op2) == CONST_INT
4800 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4801 && (trunc_int_for_mode (INTVAL (op2), SImode)
4802 == INTVAL (op2)))
4803 return 1;
4804 break;
4805 default:
4806 return 0;
4809 return 0;
4810 default:
4811 return 0;
4815 /* Value should be nonzero if functions must have frame pointers.
4816 Zero means the frame pointer need not be set up (and parms may
4817 be accessed via the stack pointer) in functions that seem suitable. */
4820 ix86_frame_pointer_required (void)
4822 /* If we accessed previous frames, then the generated code expects
4823 to be able to access the saved ebp value in our frame. */
4824 if (cfun->machine->accesses_prev_frame)
4825 return 1;
4827 /* Several x86 os'es need a frame pointer for other reasons,
4828 usually pertaining to setjmp. */
4829 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4830 return 1;
4832 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4833 the frame pointer by default. Turn it back on now if we've not
4834 got a leaf function. */
4835 if (TARGET_OMIT_LEAF_FRAME_POINTER
4836 && (!current_function_is_leaf))
4837 return 1;
4839 if (current_function_profile)
4840 return 1;
4842 return 0;
4845 /* Record that the current function accesses previous call frames. */
4847 void
4848 ix86_setup_frame_addresses (void)
4850 cfun->machine->accesses_prev_frame = 1;
4853 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4854 # define USE_HIDDEN_LINKONCE 1
4855 #else
4856 # define USE_HIDDEN_LINKONCE 0
4857 #endif
4859 static int pic_labels_used;
4861 /* Fills in the label name that should be used for a pc thunk for
4862 the given register. */
4864 static void
4865 get_pc_thunk_name (char name[32], unsigned int regno)
4867 if (USE_HIDDEN_LINKONCE)
4868 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4869 else
4870 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4874 /* This function generates code for -fpic that loads %ebx with
4875 the return address of the caller and then returns. */
4877 void
4878 ix86_file_end (void)
4880 rtx xops[2];
4881 int regno;
4883 for (regno = 0; regno < 8; ++regno)
4885 char name[32];
4887 if (! ((pic_labels_used >> regno) & 1))
4888 continue;
4890 get_pc_thunk_name (name, regno);
4892 if (USE_HIDDEN_LINKONCE)
4894 tree decl;
4896 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4897 error_mark_node);
4898 TREE_PUBLIC (decl) = 1;
4899 TREE_STATIC (decl) = 1;
4900 DECL_ONE_ONLY (decl) = 1;
4902 (*targetm.asm_out.unique_section) (decl, 0);
4903 named_section (decl, NULL, 0);
4905 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4906 fputs ("\t.hidden\t", asm_out_file);
4907 assemble_name (asm_out_file, name);
4908 fputc ('\n', asm_out_file);
4909 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4911 else
4913 text_section ();
4914 ASM_OUTPUT_LABEL (asm_out_file, name);
4917 xops[0] = gen_rtx_REG (SImode, regno);
4918 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4919 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4920 output_asm_insn ("ret", xops);
4923 if (NEED_INDICATE_EXEC_STACK)
4924 file_end_indicate_exec_stack ();
4927 /* Emit code for the SET_GOT patterns. */
4929 const char *
4930 output_set_got (rtx dest)
4932 rtx xops[3];
4934 xops[0] = dest;
4935 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4937 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4939 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4941 if (!flag_pic)
4942 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4943 else
4944 output_asm_insn ("call\t%a2", xops);
4946 #if TARGET_MACHO
4947 /* Output the "canonical" label name ("Lxx$pb") here too. This
4948 is what will be referred to by the Mach-O PIC subsystem. */
4949 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4950 #endif
4951 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4952 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4954 if (flag_pic)
4955 output_asm_insn ("pop{l}\t%0", xops);
4957 else
4959 char name[32];
4960 get_pc_thunk_name (name, REGNO (dest));
4961 pic_labels_used |= 1 << REGNO (dest);
4963 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4964 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4965 output_asm_insn ("call\t%X2", xops);
4968 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4969 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4970 else if (!TARGET_MACHO)
4971 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4973 return "";
4976 /* Generate an "push" pattern for input ARG. */
4978 static rtx
4979 gen_push (rtx arg)
4981 return gen_rtx_SET (VOIDmode,
4982 gen_rtx_MEM (Pmode,
4983 gen_rtx_PRE_DEC (Pmode,
4984 stack_pointer_rtx)),
4985 arg);
4988 /* Return >= 0 if there is an unused call-clobbered register available
4989 for the entire function. */
4991 static unsigned int
4992 ix86_select_alt_pic_regnum (void)
4994 if (current_function_is_leaf && !current_function_profile)
4996 int i;
4997 for (i = 2; i >= 0; --i)
4998 if (!regs_ever_live[i])
4999 return i;
5002 return INVALID_REGNUM;
5005 /* Return 1 if we need to save REGNO. */
5006 static int
5007 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5009 if (pic_offset_table_rtx
5010 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5011 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5012 || current_function_profile
5013 || current_function_calls_eh_return
5014 || current_function_uses_const_pool))
5016 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5017 return 0;
5018 return 1;
5021 if (current_function_calls_eh_return && maybe_eh_return)
5023 unsigned i;
5024 for (i = 0; ; i++)
5026 unsigned test = EH_RETURN_DATA_REGNO (i);
5027 if (test == INVALID_REGNUM)
5028 break;
5029 if (test == regno)
5030 return 1;
5034 return (regs_ever_live[regno]
5035 && !call_used_regs[regno]
5036 && !fixed_regs[regno]
5037 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5040 /* Return number of registers to be saved on the stack. */
5042 static int
5043 ix86_nsaved_regs (void)
5045 int nregs = 0;
5046 int regno;
5048 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5049 if (ix86_save_reg (regno, true))
5050 nregs++;
5051 return nregs;
5054 /* Return the offset between two registers, one to be eliminated, and the other
5055 its replacement, at the start of a routine. */
5057 HOST_WIDE_INT
5058 ix86_initial_elimination_offset (int from, int to)
5060 struct ix86_frame frame;
5061 ix86_compute_frame_layout (&frame);
5063 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5064 return frame.hard_frame_pointer_offset;
5065 else if (from == FRAME_POINTER_REGNUM
5066 && to == HARD_FRAME_POINTER_REGNUM)
5067 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5068 else
5070 if (to != STACK_POINTER_REGNUM)
5071 abort ();
5072 else if (from == ARG_POINTER_REGNUM)
5073 return frame.stack_pointer_offset;
5074 else if (from != FRAME_POINTER_REGNUM)
5075 abort ();
5076 else
5077 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5081 /* Fill structure ix86_frame about frame of currently computed function. */
5083 static void
5084 ix86_compute_frame_layout (struct ix86_frame *frame)
5086 HOST_WIDE_INT total_size;
5087 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5088 HOST_WIDE_INT offset;
5089 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5090 HOST_WIDE_INT size = get_frame_size ();
5092 frame->nregs = ix86_nsaved_regs ();
5093 total_size = size;
5095 /* During reload iteration the amount of registers saved can change.
5096 Recompute the value as needed. Do not recompute when amount of registers
5097 didn't change as reload does mutiple calls to the function and does not
5098 expect the decision to change within single iteration. */
5099 if (!optimize_size
5100 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5102 int count = frame->nregs;
5104 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5105 /* The fast prologue uses move instead of push to save registers. This
5106 is significantly longer, but also executes faster as modern hardware
5107 can execute the moves in parallel, but can't do that for push/pop.
5109 Be careful about choosing what prologue to emit: When function takes
5110 many instructions to execute we may use slow version as well as in
5111 case function is known to be outside hot spot (this is known with
5112 feedback only). Weight the size of function by number of registers
5113 to save as it is cheap to use one or two push instructions but very
5114 slow to use many of them. */
5115 if (count)
5116 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5117 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5118 || (flag_branch_probabilities
5119 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5120 cfun->machine->use_fast_prologue_epilogue = false;
5121 else
5122 cfun->machine->use_fast_prologue_epilogue
5123 = !expensive_function_p (count);
5125 if (TARGET_PROLOGUE_USING_MOVE
5126 && cfun->machine->use_fast_prologue_epilogue)
5127 frame->save_regs_using_mov = true;
5128 else
5129 frame->save_regs_using_mov = false;
5132 /* Skip return address and saved base pointer. */
5133 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5135 frame->hard_frame_pointer_offset = offset;
5137 /* Do some sanity checking of stack_alignment_needed and
5138 preferred_alignment, since i386 port is the only using those features
5139 that may break easily. */
5141 if (size && !stack_alignment_needed)
5142 abort ();
5143 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5144 abort ();
5145 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5146 abort ();
5147 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5148 abort ();
5150 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5151 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5153 /* Register save area */
5154 offset += frame->nregs * UNITS_PER_WORD;
5156 /* Va-arg area */
5157 if (ix86_save_varrargs_registers)
5159 offset += X86_64_VARARGS_SIZE;
5160 frame->va_arg_size = X86_64_VARARGS_SIZE;
5162 else
5163 frame->va_arg_size = 0;
5165 /* Align start of frame for local function. */
5166 frame->padding1 = ((offset + stack_alignment_needed - 1)
5167 & -stack_alignment_needed) - offset;
5169 offset += frame->padding1;
5171 /* Frame pointer points here. */
5172 frame->frame_pointer_offset = offset;
5174 offset += size;
5176 /* Add outgoing arguments area. Can be skipped if we eliminated
5177 all the function calls as dead code.
5178 Skipping is however impossible when function calls alloca. Alloca
5179 expander assumes that last current_function_outgoing_args_size
5180 of stack frame are unused. */
5181 if (ACCUMULATE_OUTGOING_ARGS
5182 && (!current_function_is_leaf || current_function_calls_alloca))
5184 offset += current_function_outgoing_args_size;
5185 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5187 else
5188 frame->outgoing_arguments_size = 0;
5190 /* Align stack boundary. Only needed if we're calling another function
5191 or using alloca. */
5192 if (!current_function_is_leaf || current_function_calls_alloca)
5193 frame->padding2 = ((offset + preferred_alignment - 1)
5194 & -preferred_alignment) - offset;
5195 else
5196 frame->padding2 = 0;
5198 offset += frame->padding2;
5200 /* We've reached end of stack frame. */
5201 frame->stack_pointer_offset = offset;
5203 /* Size prologue needs to allocate. */
5204 frame->to_allocate =
5205 (size + frame->padding1 + frame->padding2
5206 + frame->outgoing_arguments_size + frame->va_arg_size);
5208 if ((!frame->to_allocate && frame->nregs <= 1)
5209 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5210 frame->save_regs_using_mov = false;
5212 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5213 && current_function_is_leaf)
5215 frame->red_zone_size = frame->to_allocate;
5216 if (frame->save_regs_using_mov)
5217 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5218 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5219 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5221 else
5222 frame->red_zone_size = 0;
5223 frame->to_allocate -= frame->red_zone_size;
5224 frame->stack_pointer_offset -= frame->red_zone_size;
5225 #if 0
5226 fprintf (stderr, "nregs: %i\n", frame->nregs);
5227 fprintf (stderr, "size: %i\n", size);
5228 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5229 fprintf (stderr, "padding1: %i\n", frame->padding1);
5230 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5231 fprintf (stderr, "padding2: %i\n", frame->padding2);
5232 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5233 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5234 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5235 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5236 frame->hard_frame_pointer_offset);
5237 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5238 #endif
5241 /* Emit code to save registers in the prologue. */
5243 static void
5244 ix86_emit_save_regs (void)
5246 int regno;
5247 rtx insn;
5249 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5250 if (ix86_save_reg (regno, true))
5252 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5253 RTX_FRAME_RELATED_P (insn) = 1;
5257 /* Emit code to save registers using MOV insns. First register
5258 is restored from POINTER + OFFSET. */
5259 static void
5260 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5262 int regno;
5263 rtx insn;
5265 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5266 if (ix86_save_reg (regno, true))
5268 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5269 Pmode, offset),
5270 gen_rtx_REG (Pmode, regno));
5271 RTX_FRAME_RELATED_P (insn) = 1;
5272 offset += UNITS_PER_WORD;
5276 /* Expand prologue or epilogue stack adjustment.
5277 The pattern exist to put a dependency on all ebp-based memory accesses.
5278 STYLE should be negative if instructions should be marked as frame related,
5279 zero if %r11 register is live and cannot be freely used and positive
5280 otherwise. */
5282 static void
5283 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5285 rtx insn;
5287 if (! TARGET_64BIT)
5288 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5289 else if (x86_64_immediate_operand (offset, DImode))
5290 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5291 else
5293 rtx r11;
5294 /* r11 is used by indirect sibcall return as well, set before the
5295 epilogue and used after the epilogue. ATM indirect sibcall
5296 shouldn't be used together with huge frame sizes in one
5297 function because of the frame_size check in sibcall.c. */
5298 if (style == 0)
5299 abort ();
5300 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5301 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5302 if (style < 0)
5303 RTX_FRAME_RELATED_P (insn) = 1;
5304 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5305 offset));
5307 if (style < 0)
5308 RTX_FRAME_RELATED_P (insn) = 1;
5311 /* Expand the prologue into a bunch of separate insns. */
5313 void
5314 ix86_expand_prologue (void)
5316 rtx insn;
5317 bool pic_reg_used;
5318 struct ix86_frame frame;
5319 HOST_WIDE_INT allocate;
5321 ix86_compute_frame_layout (&frame);
5323 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5324 slower on all targets. Also sdb doesn't like it. */
5326 if (frame_pointer_needed)
5328 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5329 RTX_FRAME_RELATED_P (insn) = 1;
5331 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5332 RTX_FRAME_RELATED_P (insn) = 1;
5335 allocate = frame.to_allocate;
5337 if (!frame.save_regs_using_mov)
5338 ix86_emit_save_regs ();
5339 else
5340 allocate += frame.nregs * UNITS_PER_WORD;
5342 /* When using red zone we may start register saving before allocating
5343 the stack frame saving one cycle of the prologue. */
5344 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5345 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5346 : stack_pointer_rtx,
5347 -frame.nregs * UNITS_PER_WORD);
5349 if (allocate == 0)
5351 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5352 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5353 GEN_INT (-allocate), -1);
5354 else
5356 /* Only valid for Win32. */
5357 rtx eax = gen_rtx_REG (SImode, 0);
5358 bool eax_live = ix86_eax_live_at_start_p ();
5360 if (TARGET_64BIT)
5361 abort ();
5363 if (eax_live)
5365 emit_insn (gen_push (eax));
5366 allocate -= 4;
5369 insn = emit_move_insn (eax, GEN_INT (allocate));
5370 RTX_FRAME_RELATED_P (insn) = 1;
5372 insn = emit_insn (gen_allocate_stack_worker (eax));
5373 RTX_FRAME_RELATED_P (insn) = 1;
5375 if (eax_live)
5377 rtx t = plus_constant (stack_pointer_rtx, allocate);
5378 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5382 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5384 if (!frame_pointer_needed || !frame.to_allocate)
5385 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5386 else
5387 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5388 -frame.nregs * UNITS_PER_WORD);
5391 pic_reg_used = false;
5392 if (pic_offset_table_rtx
5393 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5394 || current_function_profile))
5396 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5398 if (alt_pic_reg_used != INVALID_REGNUM)
5399 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5401 pic_reg_used = true;
5404 if (pic_reg_used)
5406 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5408 /* Even with accurate pre-reload life analysis, we can wind up
5409 deleting all references to the pic register after reload.
5410 Consider if cross-jumping unifies two sides of a branch
5411 controlled by a comparison vs the only read from a global.
5412 In which case, allow the set_got to be deleted, though we're
5413 too late to do anything about the ebx save in the prologue. */
5414 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5417 /* Prevent function calls from be scheduled before the call to mcount.
5418 In the pic_reg_used case, make sure that the got load isn't deleted. */
5419 if (current_function_profile)
5420 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5423 /* Emit code to restore saved registers using MOV insns. First register
5424 is restored from POINTER + OFFSET. */
5425 static void
5426 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5427 int maybe_eh_return)
5429 int regno;
5430 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5432 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5433 if (ix86_save_reg (regno, maybe_eh_return))
5435 /* Ensure that adjust_address won't be forced to produce pointer
5436 out of range allowed by x86-64 instruction set. */
5437 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5439 rtx r11;
5441 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5442 emit_move_insn (r11, GEN_INT (offset));
5443 emit_insn (gen_adddi3 (r11, r11, pointer));
5444 base_address = gen_rtx_MEM (Pmode, r11);
5445 offset = 0;
5447 emit_move_insn (gen_rtx_REG (Pmode, regno),
5448 adjust_address (base_address, Pmode, offset));
5449 offset += UNITS_PER_WORD;
5453 /* Restore function stack, frame, and registers. */
5455 void
5456 ix86_expand_epilogue (int style)
5458 int regno;
5459 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5460 struct ix86_frame frame;
5461 HOST_WIDE_INT offset;
5463 ix86_compute_frame_layout (&frame);
5465 /* Calculate start of saved registers relative to ebp. Special care
5466 must be taken for the normal return case of a function using
5467 eh_return: the eax and edx registers are marked as saved, but not
5468 restored along this path. */
5469 offset = frame.nregs;
5470 if (current_function_calls_eh_return && style != 2)
5471 offset -= 2;
5472 offset *= -UNITS_PER_WORD;
5474 /* If we're only restoring one register and sp is not valid then
5475 using a move instruction to restore the register since it's
5476 less work than reloading sp and popping the register.
5478 The default code result in stack adjustment using add/lea instruction,
5479 while this code results in LEAVE instruction (or discrete equivalent),
5480 so it is profitable in some other cases as well. Especially when there
5481 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5482 and there is exactly one register to pop. This heuristic may need some
5483 tuning in future. */
5484 if ((!sp_valid && frame.nregs <= 1)
5485 || (TARGET_EPILOGUE_USING_MOVE
5486 && cfun->machine->use_fast_prologue_epilogue
5487 && (frame.nregs > 1 || frame.to_allocate))
5488 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5489 || (frame_pointer_needed && TARGET_USE_LEAVE
5490 && cfun->machine->use_fast_prologue_epilogue
5491 && frame.nregs == 1)
5492 || current_function_calls_eh_return)
5494 /* Restore registers. We can use ebp or esp to address the memory
5495 locations. If both are available, default to ebp, since offsets
5496 are known to be small. Only exception is esp pointing directly to the
5497 end of block of saved registers, where we may simplify addressing
5498 mode. */
5500 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5501 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5502 frame.to_allocate, style == 2);
5503 else
5504 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5505 offset, style == 2);
5507 /* eh_return epilogues need %ecx added to the stack pointer. */
5508 if (style == 2)
5510 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5512 if (frame_pointer_needed)
5514 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5515 tmp = plus_constant (tmp, UNITS_PER_WORD);
5516 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5518 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5519 emit_move_insn (hard_frame_pointer_rtx, tmp);
5521 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5522 const0_rtx, style);
5524 else
5526 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5527 tmp = plus_constant (tmp, (frame.to_allocate
5528 + frame.nregs * UNITS_PER_WORD));
5529 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5532 else if (!frame_pointer_needed)
5533 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5534 GEN_INT (frame.to_allocate
5535 + frame.nregs * UNITS_PER_WORD),
5536 style);
5537 /* If not an i386, mov & pop is faster than "leave". */
5538 else if (TARGET_USE_LEAVE || optimize_size
5539 || !cfun->machine->use_fast_prologue_epilogue)
5540 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5541 else
5543 pro_epilogue_adjust_stack (stack_pointer_rtx,
5544 hard_frame_pointer_rtx,
5545 const0_rtx, style);
5546 if (TARGET_64BIT)
5547 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5548 else
5549 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5552 else
5554 /* First step is to deallocate the stack frame so that we can
5555 pop the registers. */
5556 if (!sp_valid)
5558 if (!frame_pointer_needed)
5559 abort ();
5560 pro_epilogue_adjust_stack (stack_pointer_rtx,
5561 hard_frame_pointer_rtx,
5562 GEN_INT (offset), style);
5564 else if (frame.to_allocate)
5565 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5566 GEN_INT (frame.to_allocate), style);
5568 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5569 if (ix86_save_reg (regno, false))
5571 if (TARGET_64BIT)
5572 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5573 else
5574 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5576 if (frame_pointer_needed)
5578 /* Leave results in shorter dependency chains on CPUs that are
5579 able to grok it fast. */
5580 if (TARGET_USE_LEAVE)
5581 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5582 else if (TARGET_64BIT)
5583 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5584 else
5585 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5589 /* Sibcall epilogues don't want a return instruction. */
5590 if (style == 0)
5591 return;
5593 if (current_function_pops_args && current_function_args_size)
5595 rtx popc = GEN_INT (current_function_pops_args);
5597 /* i386 can only pop 64K bytes. If asked to pop more, pop
5598 return address, do explicit add, and jump indirectly to the
5599 caller. */
5601 if (current_function_pops_args >= 65536)
5603 rtx ecx = gen_rtx_REG (SImode, 2);
5605 /* There is no "pascal" calling convention in 64bit ABI. */
5606 if (TARGET_64BIT)
5607 abort ();
5609 emit_insn (gen_popsi1 (ecx));
5610 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5611 emit_jump_insn (gen_return_indirect_internal (ecx));
5613 else
5614 emit_jump_insn (gen_return_pop_internal (popc));
5616 else
5617 emit_jump_insn (gen_return_internal ());
5620 /* Reset from the function's potential modifications. */
5622 static void
5623 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5624 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5626 if (pic_offset_table_rtx)
5627 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5630 /* Extract the parts of an RTL expression that is a valid memory address
5631 for an instruction. Return 0 if the structure of the address is
5632 grossly off. Return -1 if the address contains ASHIFT, so it is not
5633 strictly valid, but still used for computing length of lea instruction. */
5635 static int
5636 ix86_decompose_address (rtx addr, struct ix86_address *out)
5638 rtx base = NULL_RTX;
5639 rtx index = NULL_RTX;
5640 rtx disp = NULL_RTX;
5641 HOST_WIDE_INT scale = 1;
5642 rtx scale_rtx = NULL_RTX;
5643 int retval = 1;
5644 enum ix86_address_seg seg = SEG_DEFAULT;
5646 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5647 base = addr;
5648 else if (GET_CODE (addr) == PLUS)
5650 rtx addends[4], op;
5651 int n = 0, i;
5653 op = addr;
5656 if (n >= 4)
5657 return 0;
5658 addends[n++] = XEXP (op, 1);
5659 op = XEXP (op, 0);
5661 while (GET_CODE (op) == PLUS);
5662 if (n >= 4)
5663 return 0;
5664 addends[n] = op;
5666 for (i = n; i >= 0; --i)
5668 op = addends[i];
5669 switch (GET_CODE (op))
5671 case MULT:
5672 if (index)
5673 return 0;
5674 index = XEXP (op, 0);
5675 scale_rtx = XEXP (op, 1);
5676 break;
5678 case UNSPEC:
5679 if (XINT (op, 1) == UNSPEC_TP
5680 && TARGET_TLS_DIRECT_SEG_REFS
5681 && seg == SEG_DEFAULT)
5682 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5683 else
5684 return 0;
5685 break;
5687 case REG:
5688 case SUBREG:
5689 if (!base)
5690 base = op;
5691 else if (!index)
5692 index = op;
5693 else
5694 return 0;
5695 break;
5697 case CONST:
5698 case CONST_INT:
5699 case SYMBOL_REF:
5700 case LABEL_REF:
5701 if (disp)
5702 return 0;
5703 disp = op;
5704 break;
5706 default:
5707 return 0;
5711 else if (GET_CODE (addr) == MULT)
5713 index = XEXP (addr, 0); /* index*scale */
5714 scale_rtx = XEXP (addr, 1);
5716 else if (GET_CODE (addr) == ASHIFT)
5718 rtx tmp;
5720 /* We're called for lea too, which implements ashift on occasion. */
5721 index = XEXP (addr, 0);
5722 tmp = XEXP (addr, 1);
5723 if (GET_CODE (tmp) != CONST_INT)
5724 return 0;
5725 scale = INTVAL (tmp);
5726 if ((unsigned HOST_WIDE_INT) scale > 3)
5727 return 0;
5728 scale = 1 << scale;
5729 retval = -1;
5731 else
5732 disp = addr; /* displacement */
5734 /* Extract the integral value of scale. */
5735 if (scale_rtx)
5737 if (GET_CODE (scale_rtx) != CONST_INT)
5738 return 0;
5739 scale = INTVAL (scale_rtx);
5742 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5743 if (base && index && scale == 1
5744 && (index == arg_pointer_rtx
5745 || index == frame_pointer_rtx
5746 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5748 rtx tmp = base;
5749 base = index;
5750 index = tmp;
5753 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5754 if ((base == hard_frame_pointer_rtx
5755 || base == frame_pointer_rtx
5756 || base == arg_pointer_rtx) && !disp)
5757 disp = const0_rtx;
5759 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5760 Avoid this by transforming to [%esi+0]. */
5761 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5762 && base && !index && !disp
5763 && REG_P (base)
5764 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5765 disp = const0_rtx;
5767 /* Special case: encode reg+reg instead of reg*2. */
5768 if (!base && index && scale && scale == 2)
5769 base = index, scale = 1;
5771 /* Special case: scaling cannot be encoded without base or displacement. */
5772 if (!base && !disp && index && scale != 1)
5773 disp = const0_rtx;
5775 out->base = base;
5776 out->index = index;
5777 out->disp = disp;
5778 out->scale = scale;
5779 out->seg = seg;
5781 return retval;
5784 /* Return cost of the memory address x.
5785 For i386, it is better to use a complex address than let gcc copy
5786 the address into a reg and make a new pseudo. But not if the address
5787 requires to two regs - that would mean more pseudos with longer
5788 lifetimes. */
5789 static int
5790 ix86_address_cost (rtx x)
5792 struct ix86_address parts;
5793 int cost = 1;
5795 if (!ix86_decompose_address (x, &parts))
5796 abort ();
5798 /* More complex memory references are better. */
5799 if (parts.disp && parts.disp != const0_rtx)
5800 cost--;
5801 if (parts.seg != SEG_DEFAULT)
5802 cost--;
5804 /* Attempt to minimize number of registers in the address. */
5805 if ((parts.base
5806 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5807 || (parts.index
5808 && (!REG_P (parts.index)
5809 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5810 cost++;
5812 if (parts.base
5813 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5814 && parts.index
5815 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5816 && parts.base != parts.index)
5817 cost++;
5819 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5820 since it's predecode logic can't detect the length of instructions
5821 and it degenerates to vector decoded. Increase cost of such
5822 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5823 to split such addresses or even refuse such addresses at all.
5825 Following addressing modes are affected:
5826 [base+scale*index]
5827 [scale*index+disp]
5828 [base+index]
5830 The first and last case may be avoidable by explicitly coding the zero in
5831 memory address, but I don't have AMD-K6 machine handy to check this
5832 theory. */
5834 if (TARGET_K6
5835 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5836 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5837 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5838 cost += 10;
5840 return cost;
5843 /* If X is a machine specific address (i.e. a symbol or label being
5844 referenced as a displacement from the GOT implemented using an
5845 UNSPEC), then return the base term. Otherwise return X. */
5848 ix86_find_base_term (rtx x)
5850 rtx term;
5852 if (TARGET_64BIT)
5854 if (GET_CODE (x) != CONST)
5855 return x;
5856 term = XEXP (x, 0);
5857 if (GET_CODE (term) == PLUS
5858 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5859 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5860 term = XEXP (term, 0);
5861 if (GET_CODE (term) != UNSPEC
5862 || XINT (term, 1) != UNSPEC_GOTPCREL)
5863 return x;
5865 term = XVECEXP (term, 0, 0);
5867 if (GET_CODE (term) != SYMBOL_REF
5868 && GET_CODE (term) != LABEL_REF)
5869 return x;
5871 return term;
5874 term = ix86_delegitimize_address (x);
5876 if (GET_CODE (term) != SYMBOL_REF
5877 && GET_CODE (term) != LABEL_REF)
5878 return x;
5880 return term;
5883 /* Determine if a given RTX is a valid constant. We already know this
5884 satisfies CONSTANT_P. */
5886 bool
5887 legitimate_constant_p (rtx x)
5889 rtx inner;
5891 switch (GET_CODE (x))
5893 case SYMBOL_REF:
5894 /* TLS symbols are not constant. */
5895 if (tls_symbolic_operand (x, Pmode))
5896 return false;
5897 break;
5899 case CONST:
5900 inner = XEXP (x, 0);
5902 /* Offsets of TLS symbols are never valid.
5903 Discourage CSE from creating them. */
5904 if (GET_CODE (inner) == PLUS
5905 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5906 return false;
5908 if (GET_CODE (inner) == PLUS
5909 || GET_CODE (inner) == MINUS)
5911 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5912 return false;
5913 inner = XEXP (inner, 0);
5916 /* Only some unspecs are valid as "constants". */
5917 if (GET_CODE (inner) == UNSPEC)
5918 switch (XINT (inner, 1))
5920 case UNSPEC_TPOFF:
5921 case UNSPEC_NTPOFF:
5922 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5923 case UNSPEC_DTPOFF:
5924 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5925 default:
5926 return false;
5928 break;
5930 default:
5931 break;
5934 /* Otherwise we handle everything else in the move patterns. */
5935 return true;
5938 /* Determine if it's legal to put X into the constant pool. This
5939 is not possible for the address of thread-local symbols, which
5940 is checked above. */
5942 static bool
5943 ix86_cannot_force_const_mem (rtx x)
5945 return !legitimate_constant_p (x);
5948 /* Determine if a given RTX is a valid constant address. */
5950 bool
5951 constant_address_p (rtx x)
5953 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5956 /* Nonzero if the constant value X is a legitimate general operand
5957 when generating PIC code. It is given that flag_pic is on and
5958 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5960 bool
5961 legitimate_pic_operand_p (rtx x)
5963 rtx inner;
5965 switch (GET_CODE (x))
5967 case CONST:
5968 inner = XEXP (x, 0);
5970 /* Only some unspecs are valid as "constants". */
5971 if (GET_CODE (inner) == UNSPEC)
5972 switch (XINT (inner, 1))
5974 case UNSPEC_TPOFF:
5975 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5976 default:
5977 return false;
5979 /* FALLTHRU */
5981 case SYMBOL_REF:
5982 case LABEL_REF:
5983 return legitimate_pic_address_disp_p (x);
5985 default:
5986 return true;
5990 /* Determine if a given CONST RTX is a valid memory displacement
5991 in PIC mode. */
5994 legitimate_pic_address_disp_p (rtx disp)
5996 bool saw_plus;
5998 /* In 64bit mode we can allow direct addresses of symbols and labels
5999 when they are not dynamic symbols. */
6000 if (TARGET_64BIT)
6002 /* TLS references should always be enclosed in UNSPEC. */
6003 if (tls_symbolic_operand (disp, GET_MODE (disp)))
6004 return 0;
6005 if (GET_CODE (disp) == SYMBOL_REF
6006 && ix86_cmodel == CM_SMALL_PIC
6007 && SYMBOL_REF_LOCAL_P (disp))
6008 return 1;
6009 if (GET_CODE (disp) == LABEL_REF)
6010 return 1;
6011 if (GET_CODE (disp) == CONST
6012 && GET_CODE (XEXP (disp, 0)) == PLUS)
6014 rtx op0 = XEXP (XEXP (disp, 0), 0);
6015 rtx op1 = XEXP (XEXP (disp, 0), 1);
6017 /* TLS references should always be enclosed in UNSPEC. */
6018 if (tls_symbolic_operand (op0, GET_MODE (op0)))
6019 return 0;
6020 if (((GET_CODE (op0) == SYMBOL_REF
6021 && ix86_cmodel == CM_SMALL_PIC
6022 && SYMBOL_REF_LOCAL_P (op0))
6023 || GET_CODE (op0) == LABEL_REF)
6024 && GET_CODE (op1) == CONST_INT
6025 && INTVAL (op1) < 16*1024*1024
6026 && INTVAL (op1) >= -16*1024*1024)
6027 return 1;
6030 if (GET_CODE (disp) != CONST)
6031 return 0;
6032 disp = XEXP (disp, 0);
6034 if (TARGET_64BIT)
6036 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6037 of GOT tables. We should not need these anyway. */
6038 if (GET_CODE (disp) != UNSPEC
6039 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6040 return 0;
6042 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6043 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6044 return 0;
6045 return 1;
6048 saw_plus = false;
6049 if (GET_CODE (disp) == PLUS)
6051 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6052 return 0;
6053 disp = XEXP (disp, 0);
6054 saw_plus = true;
6057 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6058 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
6060 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6061 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6062 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6064 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6065 if (! strcmp (sym_name, "<pic base>"))
6066 return 1;
6070 if (GET_CODE (disp) != UNSPEC)
6071 return 0;
6073 switch (XINT (disp, 1))
6075 case UNSPEC_GOT:
6076 if (saw_plus)
6077 return false;
6078 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6079 case UNSPEC_GOTOFF:
6080 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6081 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6082 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6083 return false;
6084 case UNSPEC_GOTTPOFF:
6085 case UNSPEC_GOTNTPOFF:
6086 case UNSPEC_INDNTPOFF:
6087 if (saw_plus)
6088 return false;
6089 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6090 case UNSPEC_NTPOFF:
6091 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6092 case UNSPEC_DTPOFF:
6093 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6096 return 0;
6099 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6100 memory address for an instruction. The MODE argument is the machine mode
6101 for the MEM expression that wants to use this address.
6103 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6104 convert common non-canonical forms to canonical form so that they will
6105 be recognized. */
6108 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6110 struct ix86_address parts;
6111 rtx base, index, disp;
6112 HOST_WIDE_INT scale;
6113 const char *reason = NULL;
6114 rtx reason_rtx = NULL_RTX;
6116 if (TARGET_DEBUG_ADDR)
6118 fprintf (stderr,
6119 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6120 GET_MODE_NAME (mode), strict);
6121 debug_rtx (addr);
6124 if (ix86_decompose_address (addr, &parts) <= 0)
6126 reason = "decomposition failed";
6127 goto report_error;
6130 base = parts.base;
6131 index = parts.index;
6132 disp = parts.disp;
6133 scale = parts.scale;
6135 /* Validate base register.
6137 Don't allow SUBREG's here, it can lead to spill failures when the base
6138 is one word out of a two word structure, which is represented internally
6139 as a DImode int. */
6141 if (base)
6143 reason_rtx = base;
6145 if (GET_CODE (base) != REG)
6147 reason = "base is not a register";
6148 goto report_error;
6151 if (GET_MODE (base) != Pmode)
6153 reason = "base is not in Pmode";
6154 goto report_error;
6157 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6158 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6160 reason = "base is not valid";
6161 goto report_error;
6165 /* Validate index register.
6167 Don't allow SUBREG's here, it can lead to spill failures when the index
6168 is one word out of a two word structure, which is represented internally
6169 as a DImode int. */
6171 if (index)
6173 reason_rtx = index;
6175 if (GET_CODE (index) != REG)
6177 reason = "index is not a register";
6178 goto report_error;
6181 if (GET_MODE (index) != Pmode)
6183 reason = "index is not in Pmode";
6184 goto report_error;
6187 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6188 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6190 reason = "index is not valid";
6191 goto report_error;
6195 /* Validate scale factor. */
6196 if (scale != 1)
6198 reason_rtx = GEN_INT (scale);
6199 if (!index)
6201 reason = "scale without index";
6202 goto report_error;
6205 if (scale != 2 && scale != 4 && scale != 8)
6207 reason = "scale is not a valid multiplier";
6208 goto report_error;
6212 /* Validate displacement. */
6213 if (disp)
6215 reason_rtx = disp;
6217 if (GET_CODE (disp) == CONST
6218 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6219 switch (XINT (XEXP (disp, 0), 1))
6221 case UNSPEC_GOT:
6222 case UNSPEC_GOTOFF:
6223 case UNSPEC_GOTPCREL:
6224 if (!flag_pic)
6225 abort ();
6226 goto is_legitimate_pic;
6228 case UNSPEC_GOTTPOFF:
6229 case UNSPEC_GOTNTPOFF:
6230 case UNSPEC_INDNTPOFF:
6231 case UNSPEC_NTPOFF:
6232 case UNSPEC_DTPOFF:
6233 break;
6235 default:
6236 reason = "invalid address unspec";
6237 goto report_error;
6240 else if (flag_pic && (SYMBOLIC_CONST (disp)
6241 #if TARGET_MACHO
6242 && !machopic_operand_p (disp)
6243 #endif
6246 is_legitimate_pic:
6247 if (TARGET_64BIT && (index || base))
6249 /* foo@dtpoff(%rX) is ok. */
6250 if (GET_CODE (disp) != CONST
6251 || GET_CODE (XEXP (disp, 0)) != PLUS
6252 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6253 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6254 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6255 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6257 reason = "non-constant pic memory reference";
6258 goto report_error;
6261 else if (! legitimate_pic_address_disp_p (disp))
6263 reason = "displacement is an invalid pic construct";
6264 goto report_error;
6267 /* This code used to verify that a symbolic pic displacement
6268 includes the pic_offset_table_rtx register.
6270 While this is good idea, unfortunately these constructs may
6271 be created by "adds using lea" optimization for incorrect
6272 code like:
6274 int a;
6275 int foo(int i)
6277 return *(&a+i);
6280 This code is nonsensical, but results in addressing
6281 GOT table with pic_offset_table_rtx base. We can't
6282 just refuse it easily, since it gets matched by
6283 "addsi3" pattern, that later gets split to lea in the
6284 case output register differs from input. While this
6285 can be handled by separate addsi pattern for this case
6286 that never results in lea, this seems to be easier and
6287 correct fix for crash to disable this test. */
6289 else if (GET_CODE (disp) != LABEL_REF
6290 && GET_CODE (disp) != CONST_INT
6291 && (GET_CODE (disp) != CONST
6292 || !legitimate_constant_p (disp))
6293 && (GET_CODE (disp) != SYMBOL_REF
6294 || !legitimate_constant_p (disp)))
6296 reason = "displacement is not constant";
6297 goto report_error;
6299 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6301 reason = "displacement is out of range";
6302 goto report_error;
6306 /* Everything looks valid. */
6307 if (TARGET_DEBUG_ADDR)
6308 fprintf (stderr, "Success.\n");
6309 return TRUE;
6311 report_error:
6312 if (TARGET_DEBUG_ADDR)
6314 fprintf (stderr, "Error: %s\n", reason);
6315 debug_rtx (reason_rtx);
6317 return FALSE;
6320 /* Return an unique alias set for the GOT. */
6322 static HOST_WIDE_INT
6323 ix86_GOT_alias_set (void)
6325 static HOST_WIDE_INT set = -1;
6326 if (set == -1)
6327 set = new_alias_set ();
6328 return set;
6331 /* Return a legitimate reference for ORIG (an address) using the
6332 register REG. If REG is 0, a new pseudo is generated.
6334 There are two types of references that must be handled:
6336 1. Global data references must load the address from the GOT, via
6337 the PIC reg. An insn is emitted to do this load, and the reg is
6338 returned.
6340 2. Static data references, constant pool addresses, and code labels
6341 compute the address as an offset from the GOT, whose base is in
6342 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6343 differentiate them from global data objects. The returned
6344 address is the PIC reg + an unspec constant.
6346 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6347 reg also appears in the address. */
6349 static rtx
6350 legitimize_pic_address (rtx orig, rtx reg)
6352 rtx addr = orig;
6353 rtx new = orig;
6354 rtx base;
6356 #if TARGET_MACHO
6357 if (reg == 0)
6358 reg = gen_reg_rtx (Pmode);
6359 /* Use the generic Mach-O PIC machinery. */
6360 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6361 #endif
6363 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6364 new = addr;
6365 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6367 /* This symbol may be referenced via a displacement from the PIC
6368 base address (@GOTOFF). */
6370 if (reload_in_progress)
6371 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6372 if (GET_CODE (addr) == CONST)
6373 addr = XEXP (addr, 0);
6374 if (GET_CODE (addr) == PLUS)
6376 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6377 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6379 else
6380 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6381 new = gen_rtx_CONST (Pmode, new);
6382 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6384 if (reg != 0)
6386 emit_move_insn (reg, new);
6387 new = reg;
6390 else if (GET_CODE (addr) == SYMBOL_REF)
6392 if (TARGET_64BIT)
6394 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6395 new = gen_rtx_CONST (Pmode, new);
6396 new = gen_rtx_MEM (Pmode, new);
6397 RTX_UNCHANGING_P (new) = 1;
6398 set_mem_alias_set (new, ix86_GOT_alias_set ());
6400 if (reg == 0)
6401 reg = gen_reg_rtx (Pmode);
6402 /* Use directly gen_movsi, otherwise the address is loaded
6403 into register for CSE. We don't want to CSE this addresses,
6404 instead we CSE addresses from the GOT table, so skip this. */
6405 emit_insn (gen_movsi (reg, new));
6406 new = reg;
6408 else
6410 /* This symbol must be referenced via a load from the
6411 Global Offset Table (@GOT). */
6413 if (reload_in_progress)
6414 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6415 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6416 new = gen_rtx_CONST (Pmode, new);
6417 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6418 new = gen_rtx_MEM (Pmode, new);
6419 RTX_UNCHANGING_P (new) = 1;
6420 set_mem_alias_set (new, ix86_GOT_alias_set ());
6422 if (reg == 0)
6423 reg = gen_reg_rtx (Pmode);
6424 emit_move_insn (reg, new);
6425 new = reg;
6428 else
6430 if (GET_CODE (addr) == CONST)
6432 addr = XEXP (addr, 0);
6434 /* We must match stuff we generate before. Assume the only
6435 unspecs that can get here are ours. Not that we could do
6436 anything with them anyway.... */
6437 if (GET_CODE (addr) == UNSPEC
6438 || (GET_CODE (addr) == PLUS
6439 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6440 return orig;
6441 if (GET_CODE (addr) != PLUS)
6442 abort ();
6444 if (GET_CODE (addr) == PLUS)
6446 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6448 /* Check first to see if this is a constant offset from a @GOTOFF
6449 symbol reference. */
6450 if (local_symbolic_operand (op0, Pmode)
6451 && GET_CODE (op1) == CONST_INT)
6453 if (!TARGET_64BIT)
6455 if (reload_in_progress)
6456 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6457 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6458 UNSPEC_GOTOFF);
6459 new = gen_rtx_PLUS (Pmode, new, op1);
6460 new = gen_rtx_CONST (Pmode, new);
6461 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6463 if (reg != 0)
6465 emit_move_insn (reg, new);
6466 new = reg;
6469 else
6471 if (INTVAL (op1) < -16*1024*1024
6472 || INTVAL (op1) >= 16*1024*1024)
6473 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6476 else
6478 base = legitimize_pic_address (XEXP (addr, 0), reg);
6479 new = legitimize_pic_address (XEXP (addr, 1),
6480 base == reg ? NULL_RTX : reg);
6482 if (GET_CODE (new) == CONST_INT)
6483 new = plus_constant (base, INTVAL (new));
6484 else
6486 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6488 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6489 new = XEXP (new, 1);
6491 new = gen_rtx_PLUS (Pmode, base, new);
6496 return new;
6499 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6501 static rtx
6502 get_thread_pointer (int to_reg)
6504 rtx tp, reg, insn;
6506 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6507 if (!to_reg)
6508 return tp;
6510 reg = gen_reg_rtx (Pmode);
6511 insn = gen_rtx_SET (VOIDmode, reg, tp);
6512 insn = emit_insn (insn);
6514 return reg;
6517 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6518 false if we expect this to be used for a memory address and true if
6519 we expect to load the address into a register. */
6521 static rtx
6522 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6524 rtx dest, base, off, pic;
6525 int type;
6527 switch (model)
6529 case TLS_MODEL_GLOBAL_DYNAMIC:
6530 dest = gen_reg_rtx (Pmode);
6531 if (TARGET_64BIT)
6533 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6535 start_sequence ();
6536 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6537 insns = get_insns ();
6538 end_sequence ();
6540 emit_libcall_block (insns, dest, rax, x);
6542 else
6543 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6544 break;
6546 case TLS_MODEL_LOCAL_DYNAMIC:
6547 base = gen_reg_rtx (Pmode);
6548 if (TARGET_64BIT)
6550 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6552 start_sequence ();
6553 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6554 insns = get_insns ();
6555 end_sequence ();
6557 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6558 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6559 emit_libcall_block (insns, base, rax, note);
6561 else
6562 emit_insn (gen_tls_local_dynamic_base_32 (base));
6564 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6565 off = gen_rtx_CONST (Pmode, off);
6567 return gen_rtx_PLUS (Pmode, base, off);
6569 case TLS_MODEL_INITIAL_EXEC:
6570 if (TARGET_64BIT)
6572 pic = NULL;
6573 type = UNSPEC_GOTNTPOFF;
6575 else if (flag_pic)
6577 if (reload_in_progress)
6578 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6579 pic = pic_offset_table_rtx;
6580 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6582 else if (!TARGET_GNU_TLS)
6584 pic = gen_reg_rtx (Pmode);
6585 emit_insn (gen_set_got (pic));
6586 type = UNSPEC_GOTTPOFF;
6588 else
6590 pic = NULL;
6591 type = UNSPEC_INDNTPOFF;
6594 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6595 off = gen_rtx_CONST (Pmode, off);
6596 if (pic)
6597 off = gen_rtx_PLUS (Pmode, pic, off);
6598 off = gen_rtx_MEM (Pmode, off);
6599 RTX_UNCHANGING_P (off) = 1;
6600 set_mem_alias_set (off, ix86_GOT_alias_set ());
6602 if (TARGET_64BIT || TARGET_GNU_TLS)
6604 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6605 off = force_reg (Pmode, off);
6606 return gen_rtx_PLUS (Pmode, base, off);
6608 else
6610 base = get_thread_pointer (true);
6611 dest = gen_reg_rtx (Pmode);
6612 emit_insn (gen_subsi3 (dest, base, off));
6614 break;
6616 case TLS_MODEL_LOCAL_EXEC:
6617 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6618 (TARGET_64BIT || TARGET_GNU_TLS)
6619 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6620 off = gen_rtx_CONST (Pmode, off);
6622 if (TARGET_64BIT || TARGET_GNU_TLS)
6624 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6625 return gen_rtx_PLUS (Pmode, base, off);
6627 else
6629 base = get_thread_pointer (true);
6630 dest = gen_reg_rtx (Pmode);
6631 emit_insn (gen_subsi3 (dest, base, off));
6633 break;
6635 default:
6636 abort ();
6639 return dest;
6642 /* Try machine-dependent ways of modifying an illegitimate address
6643 to be legitimate. If we find one, return the new, valid address.
6644 This macro is used in only one place: `memory_address' in explow.c.
6646 OLDX is the address as it was before break_out_memory_refs was called.
6647 In some cases it is useful to look at this to decide what needs to be done.
6649 MODE and WIN are passed so that this macro can use
6650 GO_IF_LEGITIMATE_ADDRESS.
6652 It is always safe for this macro to do nothing. It exists to recognize
6653 opportunities to optimize the output.
6655 For the 80386, we handle X+REG by loading X into a register R and
6656 using R+REG. R will go in a general reg and indexing will be used.
6657 However, if REG is a broken-out memory address or multiplication,
6658 nothing needs to be done because REG can certainly go in a general reg.
6660 When -fpic is used, special handling is needed for symbolic references.
6661 See comments by legitimize_pic_address in i386.c for details. */
6664 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6666 int changed = 0;
6667 unsigned log;
6669 if (TARGET_DEBUG_ADDR)
6671 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6672 GET_MODE_NAME (mode));
6673 debug_rtx (x);
6676 log = tls_symbolic_operand (x, mode);
6677 if (log)
6678 return legitimize_tls_address (x, log, false);
6679 if (GET_CODE (x) == CONST
6680 && GET_CODE (XEXP (x, 0)) == PLUS
6681 && (log = tls_symbolic_operand (XEXP (XEXP (x, 0), 0), Pmode)))
6683 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6684 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6687 if (flag_pic && SYMBOLIC_CONST (x))
6688 return legitimize_pic_address (x, 0);
6690 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6691 if (GET_CODE (x) == ASHIFT
6692 && GET_CODE (XEXP (x, 1)) == CONST_INT
6693 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6695 changed = 1;
6696 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6697 GEN_INT (1 << log));
6700 if (GET_CODE (x) == PLUS)
6702 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6704 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6705 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6706 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6708 changed = 1;
6709 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6710 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6711 GEN_INT (1 << log));
6714 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6715 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6716 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6718 changed = 1;
6719 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6720 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6721 GEN_INT (1 << log));
6724 /* Put multiply first if it isn't already. */
6725 if (GET_CODE (XEXP (x, 1)) == MULT)
6727 rtx tmp = XEXP (x, 0);
6728 XEXP (x, 0) = XEXP (x, 1);
6729 XEXP (x, 1) = tmp;
6730 changed = 1;
6733 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6734 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6735 created by virtual register instantiation, register elimination, and
6736 similar optimizations. */
6737 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6739 changed = 1;
6740 x = gen_rtx_PLUS (Pmode,
6741 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6742 XEXP (XEXP (x, 1), 0)),
6743 XEXP (XEXP (x, 1), 1));
6746 /* Canonicalize
6747 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6748 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6749 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6750 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6751 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6752 && CONSTANT_P (XEXP (x, 1)))
6754 rtx constant;
6755 rtx other = NULL_RTX;
6757 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6759 constant = XEXP (x, 1);
6760 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6762 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6764 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6765 other = XEXP (x, 1);
6767 else
6768 constant = 0;
6770 if (constant)
6772 changed = 1;
6773 x = gen_rtx_PLUS (Pmode,
6774 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6775 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6776 plus_constant (other, INTVAL (constant)));
6780 if (changed && legitimate_address_p (mode, x, FALSE))
6781 return x;
6783 if (GET_CODE (XEXP (x, 0)) == MULT)
6785 changed = 1;
6786 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6789 if (GET_CODE (XEXP (x, 1)) == MULT)
6791 changed = 1;
6792 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6795 if (changed
6796 && GET_CODE (XEXP (x, 1)) == REG
6797 && GET_CODE (XEXP (x, 0)) == REG)
6798 return x;
6800 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6802 changed = 1;
6803 x = legitimize_pic_address (x, 0);
6806 if (changed && legitimate_address_p (mode, x, FALSE))
6807 return x;
6809 if (GET_CODE (XEXP (x, 0)) == REG)
6811 rtx temp = gen_reg_rtx (Pmode);
6812 rtx val = force_operand (XEXP (x, 1), temp);
6813 if (val != temp)
6814 emit_move_insn (temp, val);
6816 XEXP (x, 1) = temp;
6817 return x;
6820 else if (GET_CODE (XEXP (x, 1)) == REG)
6822 rtx temp = gen_reg_rtx (Pmode);
6823 rtx val = force_operand (XEXP (x, 0), temp);
6824 if (val != temp)
6825 emit_move_insn (temp, val);
6827 XEXP (x, 0) = temp;
6828 return x;
6832 return x;
6835 /* Print an integer constant expression in assembler syntax. Addition
6836 and subtraction are the only arithmetic that may appear in these
6837 expressions. FILE is the stdio stream to write to, X is the rtx, and
6838 CODE is the operand print code from the output string. */
6840 static void
6841 output_pic_addr_const (FILE *file, rtx x, int code)
6843 char buf[256];
6845 switch (GET_CODE (x))
6847 case PC:
6848 if (flag_pic)
6849 putc ('.', file);
6850 else
6851 abort ();
6852 break;
6854 case SYMBOL_REF:
6855 /* Mark the decl as referenced so that cgraph will output the function. */
6856 if (SYMBOL_REF_DECL (x))
6857 mark_decl_referenced (SYMBOL_REF_DECL (x));
6859 assemble_name (file, XSTR (x, 0));
6860 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6861 fputs ("@PLT", file);
6862 break;
6864 case LABEL_REF:
6865 x = XEXP (x, 0);
6866 /* FALLTHRU */
6867 case CODE_LABEL:
6868 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6869 assemble_name (asm_out_file, buf);
6870 break;
6872 case CONST_INT:
6873 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6874 break;
6876 case CONST:
6877 /* This used to output parentheses around the expression,
6878 but that does not work on the 386 (either ATT or BSD assembler). */
6879 output_pic_addr_const (file, XEXP (x, 0), code);
6880 break;
6882 case CONST_DOUBLE:
6883 if (GET_MODE (x) == VOIDmode)
6885 /* We can use %d if the number is <32 bits and positive. */
6886 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6887 fprintf (file, "0x%lx%08lx",
6888 (unsigned long) CONST_DOUBLE_HIGH (x),
6889 (unsigned long) CONST_DOUBLE_LOW (x));
6890 else
6891 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6893 else
6894 /* We can't handle floating point constants;
6895 PRINT_OPERAND must handle them. */
6896 output_operand_lossage ("floating constant misused");
6897 break;
6899 case PLUS:
6900 /* Some assemblers need integer constants to appear first. */
6901 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6903 output_pic_addr_const (file, XEXP (x, 0), code);
6904 putc ('+', file);
6905 output_pic_addr_const (file, XEXP (x, 1), code);
6907 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6909 output_pic_addr_const (file, XEXP (x, 1), code);
6910 putc ('+', file);
6911 output_pic_addr_const (file, XEXP (x, 0), code);
6913 else
6914 abort ();
6915 break;
6917 case MINUS:
6918 if (!TARGET_MACHO)
6919 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6920 output_pic_addr_const (file, XEXP (x, 0), code);
6921 putc ('-', file);
6922 output_pic_addr_const (file, XEXP (x, 1), code);
6923 if (!TARGET_MACHO)
6924 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6925 break;
6927 case UNSPEC:
6928 if (XVECLEN (x, 0) != 1)
6929 abort ();
6930 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6931 switch (XINT (x, 1))
6933 case UNSPEC_GOT:
6934 fputs ("@GOT", file);
6935 break;
6936 case UNSPEC_GOTOFF:
6937 fputs ("@GOTOFF", file);
6938 break;
6939 case UNSPEC_GOTPCREL:
6940 fputs ("@GOTPCREL(%rip)", file);
6941 break;
6942 case UNSPEC_GOTTPOFF:
6943 /* FIXME: This might be @TPOFF in Sun ld too. */
6944 fputs ("@GOTTPOFF", file);
6945 break;
6946 case UNSPEC_TPOFF:
6947 fputs ("@TPOFF", file);
6948 break;
6949 case UNSPEC_NTPOFF:
6950 if (TARGET_64BIT)
6951 fputs ("@TPOFF", file);
6952 else
6953 fputs ("@NTPOFF", file);
6954 break;
6955 case UNSPEC_DTPOFF:
6956 fputs ("@DTPOFF", file);
6957 break;
6958 case UNSPEC_GOTNTPOFF:
6959 if (TARGET_64BIT)
6960 fputs ("@GOTTPOFF(%rip)", file);
6961 else
6962 fputs ("@GOTNTPOFF", file);
6963 break;
6964 case UNSPEC_INDNTPOFF:
6965 fputs ("@INDNTPOFF", file);
6966 break;
6967 default:
6968 output_operand_lossage ("invalid UNSPEC as operand");
6969 break;
6971 break;
6973 default:
6974 output_operand_lossage ("invalid expression as operand");
6978 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6979 We need to handle our special PIC relocations. */
6981 void
6982 i386_dwarf_output_addr_const (FILE *file, rtx x)
6984 #ifdef ASM_QUAD
6985 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6986 #else
6987 if (TARGET_64BIT)
6988 abort ();
6989 fprintf (file, "%s", ASM_LONG);
6990 #endif
6991 if (flag_pic)
6992 output_pic_addr_const (file, x, '\0');
6993 else
6994 output_addr_const (file, x);
6995 fputc ('\n', file);
6998 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6999 We need to emit DTP-relative relocations. */
7001 void
7002 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7004 fputs (ASM_LONG, file);
7005 output_addr_const (file, x);
7006 fputs ("@DTPOFF", file);
7007 switch (size)
7009 case 4:
7010 break;
7011 case 8:
7012 fputs (", 0", file);
7013 break;
7014 default:
7015 abort ();
7019 /* In the name of slightly smaller debug output, and to cater to
7020 general assembler losage, recognize PIC+GOTOFF and turn it back
7021 into a direct symbol reference. */
7023 static rtx
7024 ix86_delegitimize_address (rtx orig_x)
7026 rtx x = orig_x, y;
7028 if (GET_CODE (x) == MEM)
7029 x = XEXP (x, 0);
7031 if (TARGET_64BIT)
7033 if (GET_CODE (x) != CONST
7034 || GET_CODE (XEXP (x, 0)) != UNSPEC
7035 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7036 || GET_CODE (orig_x) != MEM)
7037 return orig_x;
7038 return XVECEXP (XEXP (x, 0), 0, 0);
7041 if (GET_CODE (x) != PLUS
7042 || GET_CODE (XEXP (x, 1)) != CONST)
7043 return orig_x;
7045 if (GET_CODE (XEXP (x, 0)) == REG
7046 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7047 /* %ebx + GOT/GOTOFF */
7048 y = NULL;
7049 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7051 /* %ebx + %reg * scale + GOT/GOTOFF */
7052 y = XEXP (x, 0);
7053 if (GET_CODE (XEXP (y, 0)) == REG
7054 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
7055 y = XEXP (y, 1);
7056 else if (GET_CODE (XEXP (y, 1)) == REG
7057 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7058 y = XEXP (y, 0);
7059 else
7060 return orig_x;
7061 if (GET_CODE (y) != REG
7062 && GET_CODE (y) != MULT
7063 && GET_CODE (y) != ASHIFT)
7064 return orig_x;
7066 else
7067 return orig_x;
7069 x = XEXP (XEXP (x, 1), 0);
7070 if (GET_CODE (x) == UNSPEC
7071 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7072 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7074 if (y)
7075 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7076 return XVECEXP (x, 0, 0);
7079 if (GET_CODE (x) == PLUS
7080 && GET_CODE (XEXP (x, 0)) == UNSPEC
7081 && GET_CODE (XEXP (x, 1)) == CONST_INT
7082 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7083 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7084 && GET_CODE (orig_x) != MEM)))
7086 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7087 if (y)
7088 return gen_rtx_PLUS (Pmode, y, x);
7089 return x;
7092 return orig_x;
7095 static void
7096 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7097 int fp, FILE *file)
7099 const char *suffix;
7101 if (mode == CCFPmode || mode == CCFPUmode)
7103 enum rtx_code second_code, bypass_code;
7104 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7105 if (bypass_code != NIL || second_code != NIL)
7106 abort ();
7107 code = ix86_fp_compare_code_to_integer (code);
7108 mode = CCmode;
7110 if (reverse)
7111 code = reverse_condition (code);
7113 switch (code)
7115 case EQ:
7116 suffix = "e";
7117 break;
7118 case NE:
7119 suffix = "ne";
7120 break;
7121 case GT:
7122 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7123 abort ();
7124 suffix = "g";
7125 break;
7126 case GTU:
7127 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7128 Those same assemblers have the same but opposite losage on cmov. */
7129 if (mode != CCmode)
7130 abort ();
7131 suffix = fp ? "nbe" : "a";
7132 break;
7133 case LT:
7134 if (mode == CCNOmode || mode == CCGOCmode)
7135 suffix = "s";
7136 else if (mode == CCmode || mode == CCGCmode)
7137 suffix = "l";
7138 else
7139 abort ();
7140 break;
7141 case LTU:
7142 if (mode != CCmode)
7143 abort ();
7144 suffix = "b";
7145 break;
7146 case GE:
7147 if (mode == CCNOmode || mode == CCGOCmode)
7148 suffix = "ns";
7149 else if (mode == CCmode || mode == CCGCmode)
7150 suffix = "ge";
7151 else
7152 abort ();
7153 break;
7154 case GEU:
7155 /* ??? As above. */
7156 if (mode != CCmode)
7157 abort ();
7158 suffix = fp ? "nb" : "ae";
7159 break;
7160 case LE:
7161 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7162 abort ();
7163 suffix = "le";
7164 break;
7165 case LEU:
7166 if (mode != CCmode)
7167 abort ();
7168 suffix = "be";
7169 break;
7170 case UNORDERED:
7171 suffix = fp ? "u" : "p";
7172 break;
7173 case ORDERED:
7174 suffix = fp ? "nu" : "np";
7175 break;
7176 default:
7177 abort ();
7179 fputs (suffix, file);
7182 /* Print the name of register X to FILE based on its machine mode and number.
7183 If CODE is 'w', pretend the mode is HImode.
7184 If CODE is 'b', pretend the mode is QImode.
7185 If CODE is 'k', pretend the mode is SImode.
7186 If CODE is 'q', pretend the mode is DImode.
7187 If CODE is 'h', pretend the reg is the `high' byte register.
7188 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7190 void
7191 print_reg (rtx x, int code, FILE *file)
7193 if (REGNO (x) == ARG_POINTER_REGNUM
7194 || REGNO (x) == FRAME_POINTER_REGNUM
7195 || REGNO (x) == FLAGS_REG
7196 || REGNO (x) == FPSR_REG)
7197 abort ();
7199 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7200 putc ('%', file);
7202 if (code == 'w' || MMX_REG_P (x))
7203 code = 2;
7204 else if (code == 'b')
7205 code = 1;
7206 else if (code == 'k')
7207 code = 4;
7208 else if (code == 'q')
7209 code = 8;
7210 else if (code == 'y')
7211 code = 3;
7212 else if (code == 'h')
7213 code = 0;
7214 else
7215 code = GET_MODE_SIZE (GET_MODE (x));
7217 /* Irritatingly, AMD extended registers use different naming convention
7218 from the normal registers. */
7219 if (REX_INT_REG_P (x))
7221 if (!TARGET_64BIT)
7222 abort ();
7223 switch (code)
7225 case 0:
7226 error ("extended registers have no high halves");
7227 break;
7228 case 1:
7229 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7230 break;
7231 case 2:
7232 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7233 break;
7234 case 4:
7235 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7236 break;
7237 case 8:
7238 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7239 break;
7240 default:
7241 error ("unsupported operand size for extended register");
7242 break;
7244 return;
7246 switch (code)
7248 case 3:
7249 if (STACK_TOP_P (x))
7251 fputs ("st(0)", file);
7252 break;
7254 /* FALLTHRU */
7255 case 8:
7256 case 4:
7257 case 12:
7258 if (! ANY_FP_REG_P (x))
7259 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7260 /* FALLTHRU */
7261 case 16:
7262 case 2:
7263 normal:
7264 fputs (hi_reg_name[REGNO (x)], file);
7265 break;
7266 case 1:
7267 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7268 goto normal;
7269 fputs (qi_reg_name[REGNO (x)], file);
7270 break;
7271 case 0:
7272 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7273 goto normal;
7274 fputs (qi_high_reg_name[REGNO (x)], file);
7275 break;
7276 default:
7277 abort ();
7281 /* Locate some local-dynamic symbol still in use by this function
7282 so that we can print its name in some tls_local_dynamic_base
7283 pattern. */
7285 static const char *
7286 get_some_local_dynamic_name (void)
7288 rtx insn;
7290 if (cfun->machine->some_ld_name)
7291 return cfun->machine->some_ld_name;
7293 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7294 if (INSN_P (insn)
7295 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7296 return cfun->machine->some_ld_name;
7298 abort ();
7301 static int
7302 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7304 rtx x = *px;
7306 if (GET_CODE (x) == SYMBOL_REF
7307 && local_dynamic_symbolic_operand (x, Pmode))
7309 cfun->machine->some_ld_name = XSTR (x, 0);
7310 return 1;
7313 return 0;
7316 /* Meaning of CODE:
7317 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7318 C -- print opcode suffix for set/cmov insn.
7319 c -- like C, but print reversed condition
7320 F,f -- likewise, but for floating-point.
7321 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7322 otherwise nothing
7323 R -- print the prefix for register names.
7324 z -- print the opcode suffix for the size of the current operand.
7325 * -- print a star (in certain assembler syntax)
7326 A -- print an absolute memory reference.
7327 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7328 s -- print a shift double count, followed by the assemblers argument
7329 delimiter.
7330 b -- print the QImode name of the register for the indicated operand.
7331 %b0 would print %al if operands[0] is reg 0.
7332 w -- likewise, print the HImode name of the register.
7333 k -- likewise, print the SImode name of the register.
7334 q -- likewise, print the DImode name of the register.
7335 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7336 y -- print "st(0)" instead of "st" as a register.
7337 D -- print condition for SSE cmp instruction.
7338 P -- if PIC, print an @PLT suffix.
7339 X -- don't print any sort of PIC '@' suffix for a symbol.
7340 & -- print some in-use local-dynamic symbol name.
7343 void
7344 print_operand (FILE *file, rtx x, int code)
7346 if (code)
7348 switch (code)
7350 case '*':
7351 if (ASSEMBLER_DIALECT == ASM_ATT)
7352 putc ('*', file);
7353 return;
7355 case '&':
7356 assemble_name (file, get_some_local_dynamic_name ());
7357 return;
7359 case 'A':
7360 if (ASSEMBLER_DIALECT == ASM_ATT)
7361 putc ('*', file);
7362 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7364 /* Intel syntax. For absolute addresses, registers should not
7365 be surrounded by braces. */
7366 if (GET_CODE (x) != REG)
7368 putc ('[', file);
7369 PRINT_OPERAND (file, x, 0);
7370 putc (']', file);
7371 return;
7374 else
7375 abort ();
7377 PRINT_OPERAND (file, x, 0);
7378 return;
7381 case 'L':
7382 if (ASSEMBLER_DIALECT == ASM_ATT)
7383 putc ('l', file);
7384 return;
7386 case 'W':
7387 if (ASSEMBLER_DIALECT == ASM_ATT)
7388 putc ('w', file);
7389 return;
7391 case 'B':
7392 if (ASSEMBLER_DIALECT == ASM_ATT)
7393 putc ('b', file);
7394 return;
7396 case 'Q':
7397 if (ASSEMBLER_DIALECT == ASM_ATT)
7398 putc ('l', file);
7399 return;
7401 case 'S':
7402 if (ASSEMBLER_DIALECT == ASM_ATT)
7403 putc ('s', file);
7404 return;
7406 case 'T':
7407 if (ASSEMBLER_DIALECT == ASM_ATT)
7408 putc ('t', file);
7409 return;
7411 case 'z':
7412 /* 387 opcodes don't get size suffixes if the operands are
7413 registers. */
7414 if (STACK_REG_P (x))
7415 return;
7417 /* Likewise if using Intel opcodes. */
7418 if (ASSEMBLER_DIALECT == ASM_INTEL)
7419 return;
7421 /* This is the size of op from size of operand. */
7422 switch (GET_MODE_SIZE (GET_MODE (x)))
7424 case 2:
7425 #ifdef HAVE_GAS_FILDS_FISTS
7426 putc ('s', file);
7427 #endif
7428 return;
7430 case 4:
7431 if (GET_MODE (x) == SFmode)
7433 putc ('s', file);
7434 return;
7436 else
7437 putc ('l', file);
7438 return;
7440 case 12:
7441 case 16:
7442 putc ('t', file);
7443 return;
7445 case 8:
7446 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7448 #ifdef GAS_MNEMONICS
7449 putc ('q', file);
7450 #else
7451 putc ('l', file);
7452 putc ('l', file);
7453 #endif
7455 else
7456 putc ('l', file);
7457 return;
7459 default:
7460 abort ();
7463 case 'b':
7464 case 'w':
7465 case 'k':
7466 case 'q':
7467 case 'h':
7468 case 'y':
7469 case 'X':
7470 case 'P':
7471 break;
7473 case 's':
7474 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7476 PRINT_OPERAND (file, x, 0);
7477 putc (',', file);
7479 return;
7481 case 'D':
7482 /* Little bit of braindamage here. The SSE compare instructions
7483 does use completely different names for the comparisons that the
7484 fp conditional moves. */
7485 switch (GET_CODE (x))
7487 case EQ:
7488 case UNEQ:
7489 fputs ("eq", file);
7490 break;
7491 case LT:
7492 case UNLT:
7493 fputs ("lt", file);
7494 break;
7495 case LE:
7496 case UNLE:
7497 fputs ("le", file);
7498 break;
7499 case UNORDERED:
7500 fputs ("unord", file);
7501 break;
7502 case NE:
7503 case LTGT:
7504 fputs ("neq", file);
7505 break;
7506 case UNGE:
7507 case GE:
7508 fputs ("nlt", file);
7509 break;
7510 case UNGT:
7511 case GT:
7512 fputs ("nle", file);
7513 break;
7514 case ORDERED:
7515 fputs ("ord", file);
7516 break;
7517 default:
7518 abort ();
7519 break;
7521 return;
7522 case 'O':
7523 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7524 if (ASSEMBLER_DIALECT == ASM_ATT)
7526 switch (GET_MODE (x))
7528 case HImode: putc ('w', file); break;
7529 case SImode:
7530 case SFmode: putc ('l', file); break;
7531 case DImode:
7532 case DFmode: putc ('q', file); break;
7533 default: abort ();
7535 putc ('.', file);
7537 #endif
7538 return;
7539 case 'C':
7540 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7541 return;
7542 case 'F':
7543 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7544 if (ASSEMBLER_DIALECT == ASM_ATT)
7545 putc ('.', file);
7546 #endif
7547 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7548 return;
7550 /* Like above, but reverse condition */
7551 case 'c':
7552 /* Check to see if argument to %c is really a constant
7553 and not a condition code which needs to be reversed. */
7554 if (!COMPARISON_P (x))
7556 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7557 return;
7559 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7560 return;
7561 case 'f':
7562 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7563 if (ASSEMBLER_DIALECT == ASM_ATT)
7564 putc ('.', file);
7565 #endif
7566 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7567 return;
7568 case '+':
7570 rtx x;
7572 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7573 return;
7575 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7576 if (x)
7578 int pred_val = INTVAL (XEXP (x, 0));
7580 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7581 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7583 int taken = pred_val > REG_BR_PROB_BASE / 2;
7584 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7586 /* Emit hints only in the case default branch prediction
7587 heuristics would fail. */
7588 if (taken != cputaken)
7590 /* We use 3e (DS) prefix for taken branches and
7591 2e (CS) prefix for not taken branches. */
7592 if (taken)
7593 fputs ("ds ; ", file);
7594 else
7595 fputs ("cs ; ", file);
7599 return;
7601 default:
7602 output_operand_lossage ("invalid operand code `%c'", code);
7606 if (GET_CODE (x) == REG)
7607 print_reg (x, code, file);
7609 else if (GET_CODE (x) == MEM)
7611 /* No `byte ptr' prefix for call instructions. */
7612 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7614 const char * size;
7615 switch (GET_MODE_SIZE (GET_MODE (x)))
7617 case 1: size = "BYTE"; break;
7618 case 2: size = "WORD"; break;
7619 case 4: size = "DWORD"; break;
7620 case 8: size = "QWORD"; break;
7621 case 12: size = "XWORD"; break;
7622 case 16: size = "XMMWORD"; break;
7623 default:
7624 abort ();
7627 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7628 if (code == 'b')
7629 size = "BYTE";
7630 else if (code == 'w')
7631 size = "WORD";
7632 else if (code == 'k')
7633 size = "DWORD";
7635 fputs (size, file);
7636 fputs (" PTR ", file);
7639 x = XEXP (x, 0);
7640 /* Avoid (%rip) for call operands. */
7641 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7642 && GET_CODE (x) != CONST_INT)
7643 output_addr_const (file, x);
7644 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7645 output_operand_lossage ("invalid constraints for operand");
7646 else
7647 output_address (x);
7650 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7652 REAL_VALUE_TYPE r;
7653 long l;
7655 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7656 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7658 if (ASSEMBLER_DIALECT == ASM_ATT)
7659 putc ('$', file);
7660 fprintf (file, "0x%08lx", l);
7663 /* These float cases don't actually occur as immediate operands. */
7664 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7666 char dstr[30];
7668 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7669 fprintf (file, "%s", dstr);
7672 else if (GET_CODE (x) == CONST_DOUBLE
7673 && GET_MODE (x) == XFmode)
7675 char dstr[30];
7677 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7678 fprintf (file, "%s", dstr);
7681 else
7683 if (code != 'P')
7685 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7687 if (ASSEMBLER_DIALECT == ASM_ATT)
7688 putc ('$', file);
7690 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7691 || GET_CODE (x) == LABEL_REF)
7693 if (ASSEMBLER_DIALECT == ASM_ATT)
7694 putc ('$', file);
7695 else
7696 fputs ("OFFSET FLAT:", file);
7699 if (GET_CODE (x) == CONST_INT)
7700 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7701 else if (flag_pic)
7702 output_pic_addr_const (file, x, code);
7703 else
7704 output_addr_const (file, x);
7708 /* Print a memory operand whose address is ADDR. */
7710 void
7711 print_operand_address (FILE *file, rtx addr)
7713 struct ix86_address parts;
7714 rtx base, index, disp;
7715 int scale;
7717 if (! ix86_decompose_address (addr, &parts))
7718 abort ();
7720 base = parts.base;
7721 index = parts.index;
7722 disp = parts.disp;
7723 scale = parts.scale;
7725 switch (parts.seg)
7727 case SEG_DEFAULT:
7728 break;
7729 case SEG_FS:
7730 case SEG_GS:
7731 if (USER_LABEL_PREFIX[0] == 0)
7732 putc ('%', file);
7733 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7734 break;
7735 default:
7736 abort ();
7739 if (!base && !index)
7741 /* Displacement only requires special attention. */
7743 if (GET_CODE (disp) == CONST_INT)
7745 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7747 if (USER_LABEL_PREFIX[0] == 0)
7748 putc ('%', file);
7749 fputs ("ds:", file);
7751 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7753 else if (flag_pic)
7754 output_pic_addr_const (file, disp, 0);
7755 else
7756 output_addr_const (file, disp);
7758 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7759 if (TARGET_64BIT
7760 && ((GET_CODE (disp) == SYMBOL_REF
7761 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7762 || GET_CODE (disp) == LABEL_REF
7763 || (GET_CODE (disp) == CONST
7764 && GET_CODE (XEXP (disp, 0)) == PLUS
7765 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7766 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7767 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7768 fputs ("(%rip)", file);
7770 else
7772 if (ASSEMBLER_DIALECT == ASM_ATT)
7774 if (disp)
7776 if (flag_pic)
7777 output_pic_addr_const (file, disp, 0);
7778 else if (GET_CODE (disp) == LABEL_REF)
7779 output_asm_label (disp);
7780 else
7781 output_addr_const (file, disp);
7784 putc ('(', file);
7785 if (base)
7786 print_reg (base, 0, file);
7787 if (index)
7789 putc (',', file);
7790 print_reg (index, 0, file);
7791 if (scale != 1)
7792 fprintf (file, ",%d", scale);
7794 putc (')', file);
7796 else
7798 rtx offset = NULL_RTX;
7800 if (disp)
7802 /* Pull out the offset of a symbol; print any symbol itself. */
7803 if (GET_CODE (disp) == CONST
7804 && GET_CODE (XEXP (disp, 0)) == PLUS
7805 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7807 offset = XEXP (XEXP (disp, 0), 1);
7808 disp = gen_rtx_CONST (VOIDmode,
7809 XEXP (XEXP (disp, 0), 0));
7812 if (flag_pic)
7813 output_pic_addr_const (file, disp, 0);
7814 else if (GET_CODE (disp) == LABEL_REF)
7815 output_asm_label (disp);
7816 else if (GET_CODE (disp) == CONST_INT)
7817 offset = disp;
7818 else
7819 output_addr_const (file, disp);
7822 putc ('[', file);
7823 if (base)
7825 print_reg (base, 0, file);
7826 if (offset)
7828 if (INTVAL (offset) >= 0)
7829 putc ('+', file);
7830 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7833 else if (offset)
7834 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7835 else
7836 putc ('0', file);
7838 if (index)
7840 putc ('+', file);
7841 print_reg (index, 0, file);
7842 if (scale != 1)
7843 fprintf (file, "*%d", scale);
7845 putc (']', file);
7850 bool
7851 output_addr_const_extra (FILE *file, rtx x)
7853 rtx op;
7855 if (GET_CODE (x) != UNSPEC)
7856 return false;
7858 op = XVECEXP (x, 0, 0);
7859 switch (XINT (x, 1))
7861 case UNSPEC_GOTTPOFF:
7862 output_addr_const (file, op);
7863 /* FIXME: This might be @TPOFF in Sun ld. */
7864 fputs ("@GOTTPOFF", file);
7865 break;
7866 case UNSPEC_TPOFF:
7867 output_addr_const (file, op);
7868 fputs ("@TPOFF", file);
7869 break;
7870 case UNSPEC_NTPOFF:
7871 output_addr_const (file, op);
7872 if (TARGET_64BIT)
7873 fputs ("@TPOFF", file);
7874 else
7875 fputs ("@NTPOFF", file);
7876 break;
7877 case UNSPEC_DTPOFF:
7878 output_addr_const (file, op);
7879 fputs ("@DTPOFF", file);
7880 break;
7881 case UNSPEC_GOTNTPOFF:
7882 output_addr_const (file, op);
7883 if (TARGET_64BIT)
7884 fputs ("@GOTTPOFF(%rip)", file);
7885 else
7886 fputs ("@GOTNTPOFF", file);
7887 break;
7888 case UNSPEC_INDNTPOFF:
7889 output_addr_const (file, op);
7890 fputs ("@INDNTPOFF", file);
7891 break;
7893 default:
7894 return false;
7897 return true;
7900 /* Split one or more DImode RTL references into pairs of SImode
7901 references. The RTL can be REG, offsettable MEM, integer constant, or
7902 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7903 split and "num" is its length. lo_half and hi_half are output arrays
7904 that parallel "operands". */
7906 void
7907 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7909 while (num--)
7911 rtx op = operands[num];
7913 /* simplify_subreg refuse to split volatile memory addresses,
7914 but we still have to handle it. */
7915 if (GET_CODE (op) == MEM)
7917 lo_half[num] = adjust_address (op, SImode, 0);
7918 hi_half[num] = adjust_address (op, SImode, 4);
7920 else
7922 lo_half[num] = simplify_gen_subreg (SImode, op,
7923 GET_MODE (op) == VOIDmode
7924 ? DImode : GET_MODE (op), 0);
7925 hi_half[num] = simplify_gen_subreg (SImode, op,
7926 GET_MODE (op) == VOIDmode
7927 ? DImode : GET_MODE (op), 4);
7931 /* Split one or more TImode RTL references into pairs of SImode
7932 references. The RTL can be REG, offsettable MEM, integer constant, or
7933 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7934 split and "num" is its length. lo_half and hi_half are output arrays
7935 that parallel "operands". */
7937 void
7938 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7940 while (num--)
7942 rtx op = operands[num];
7944 /* simplify_subreg refuse to split volatile memory addresses, but we
7945 still have to handle it. */
7946 if (GET_CODE (op) == MEM)
7948 lo_half[num] = adjust_address (op, DImode, 0);
7949 hi_half[num] = adjust_address (op, DImode, 8);
7951 else
7953 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7954 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7959 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7960 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7961 is the expression of the binary operation. The output may either be
7962 emitted here, or returned to the caller, like all output_* functions.
7964 There is no guarantee that the operands are the same mode, as they
7965 might be within FLOAT or FLOAT_EXTEND expressions. */
7967 #ifndef SYSV386_COMPAT
7968 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7969 wants to fix the assemblers because that causes incompatibility
7970 with gcc. No-one wants to fix gcc because that causes
7971 incompatibility with assemblers... You can use the option of
7972 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7973 #define SYSV386_COMPAT 1
7974 #endif
7976 const char *
7977 output_387_binary_op (rtx insn, rtx *operands)
7979 static char buf[30];
7980 const char *p;
7981 const char *ssep;
7982 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7984 #ifdef ENABLE_CHECKING
7985 /* Even if we do not want to check the inputs, this documents input
7986 constraints. Which helps in understanding the following code. */
7987 if (STACK_REG_P (operands[0])
7988 && ((REG_P (operands[1])
7989 && REGNO (operands[0]) == REGNO (operands[1])
7990 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7991 || (REG_P (operands[2])
7992 && REGNO (operands[0]) == REGNO (operands[2])
7993 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7994 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7995 ; /* ok */
7996 else if (!is_sse)
7997 abort ();
7998 #endif
8000 switch (GET_CODE (operands[3]))
8002 case PLUS:
8003 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8004 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8005 p = "fiadd";
8006 else
8007 p = "fadd";
8008 ssep = "add";
8009 break;
8011 case MINUS:
8012 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8013 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8014 p = "fisub";
8015 else
8016 p = "fsub";
8017 ssep = "sub";
8018 break;
8020 case MULT:
8021 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8022 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8023 p = "fimul";
8024 else
8025 p = "fmul";
8026 ssep = "mul";
8027 break;
8029 case DIV:
8030 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8031 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8032 p = "fidiv";
8033 else
8034 p = "fdiv";
8035 ssep = "div";
8036 break;
8038 default:
8039 abort ();
8042 if (is_sse)
8044 strcpy (buf, ssep);
8045 if (GET_MODE (operands[0]) == SFmode)
8046 strcat (buf, "ss\t{%2, %0|%0, %2}");
8047 else
8048 strcat (buf, "sd\t{%2, %0|%0, %2}");
8049 return buf;
8051 strcpy (buf, p);
8053 switch (GET_CODE (operands[3]))
8055 case MULT:
8056 case PLUS:
8057 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8059 rtx temp = operands[2];
8060 operands[2] = operands[1];
8061 operands[1] = temp;
8064 /* know operands[0] == operands[1]. */
8066 if (GET_CODE (operands[2]) == MEM)
8068 p = "%z2\t%2";
8069 break;
8072 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8074 if (STACK_TOP_P (operands[0]))
8075 /* How is it that we are storing to a dead operand[2]?
8076 Well, presumably operands[1] is dead too. We can't
8077 store the result to st(0) as st(0) gets popped on this
8078 instruction. Instead store to operands[2] (which I
8079 think has to be st(1)). st(1) will be popped later.
8080 gcc <= 2.8.1 didn't have this check and generated
8081 assembly code that the Unixware assembler rejected. */
8082 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8083 else
8084 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8085 break;
8088 if (STACK_TOP_P (operands[0]))
8089 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8090 else
8091 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8092 break;
8094 case MINUS:
8095 case DIV:
8096 if (GET_CODE (operands[1]) == MEM)
8098 p = "r%z1\t%1";
8099 break;
8102 if (GET_CODE (operands[2]) == MEM)
8104 p = "%z2\t%2";
8105 break;
8108 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8110 #if SYSV386_COMPAT
8111 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8112 derived assemblers, confusingly reverse the direction of
8113 the operation for fsub{r} and fdiv{r} when the
8114 destination register is not st(0). The Intel assembler
8115 doesn't have this brain damage. Read !SYSV386_COMPAT to
8116 figure out what the hardware really does. */
8117 if (STACK_TOP_P (operands[0]))
8118 p = "{p\t%0, %2|rp\t%2, %0}";
8119 else
8120 p = "{rp\t%2, %0|p\t%0, %2}";
8121 #else
8122 if (STACK_TOP_P (operands[0]))
8123 /* As above for fmul/fadd, we can't store to st(0). */
8124 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8125 else
8126 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8127 #endif
8128 break;
8131 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8133 #if SYSV386_COMPAT
8134 if (STACK_TOP_P (operands[0]))
8135 p = "{rp\t%0, %1|p\t%1, %0}";
8136 else
8137 p = "{p\t%1, %0|rp\t%0, %1}";
8138 #else
8139 if (STACK_TOP_P (operands[0]))
8140 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8141 else
8142 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8143 #endif
8144 break;
8147 if (STACK_TOP_P (operands[0]))
8149 if (STACK_TOP_P (operands[1]))
8150 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8151 else
8152 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8153 break;
8155 else if (STACK_TOP_P (operands[1]))
8157 #if SYSV386_COMPAT
8158 p = "{\t%1, %0|r\t%0, %1}";
8159 #else
8160 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8161 #endif
8163 else
8165 #if SYSV386_COMPAT
8166 p = "{r\t%2, %0|\t%0, %2}";
8167 #else
8168 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8169 #endif
8171 break;
8173 default:
8174 abort ();
8177 strcat (buf, p);
8178 return buf;
8181 /* Output code to initialize control word copies used by
8182 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8183 is set to control word rounding downwards. */
8184 void
8185 emit_i387_cw_initialization (rtx normal, rtx round_down)
8187 rtx reg = gen_reg_rtx (HImode);
8189 emit_insn (gen_x86_fnstcw_1 (normal));
8190 emit_move_insn (reg, normal);
8191 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8192 && !TARGET_64BIT)
8193 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8194 else
8195 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8196 emit_move_insn (round_down, reg);
8199 /* Output code for INSN to convert a float to a signed int. OPERANDS
8200 are the insn operands. The output may be [HSD]Imode and the input
8201 operand may be [SDX]Fmode. */
8203 const char *
8204 output_fix_trunc (rtx insn, rtx *operands)
8206 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8207 int dimode_p = GET_MODE (operands[0]) == DImode;
8209 /* Jump through a hoop or two for DImode, since the hardware has no
8210 non-popping instruction. We used to do this a different way, but
8211 that was somewhat fragile and broke with post-reload splitters. */
8212 if (dimode_p && !stack_top_dies)
8213 output_asm_insn ("fld\t%y1", operands);
8215 if (!STACK_TOP_P (operands[1]))
8216 abort ();
8218 if (GET_CODE (operands[0]) != MEM)
8219 abort ();
8221 output_asm_insn ("fldcw\t%3", operands);
8222 if (stack_top_dies || dimode_p)
8223 output_asm_insn ("fistp%z0\t%0", operands);
8224 else
8225 output_asm_insn ("fist%z0\t%0", operands);
8226 output_asm_insn ("fldcw\t%2", operands);
8228 return "";
8231 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8232 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8233 when fucom should be used. */
8235 const char *
8236 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8238 int stack_top_dies;
8239 rtx cmp_op0 = operands[0];
8240 rtx cmp_op1 = operands[1];
8241 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8243 if (eflags_p == 2)
8245 cmp_op0 = cmp_op1;
8246 cmp_op1 = operands[2];
8248 if (is_sse)
8250 if (GET_MODE (operands[0]) == SFmode)
8251 if (unordered_p)
8252 return "ucomiss\t{%1, %0|%0, %1}";
8253 else
8254 return "comiss\t{%1, %0|%0, %1}";
8255 else
8256 if (unordered_p)
8257 return "ucomisd\t{%1, %0|%0, %1}";
8258 else
8259 return "comisd\t{%1, %0|%0, %1}";
8262 if (! STACK_TOP_P (cmp_op0))
8263 abort ();
8265 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8267 if (STACK_REG_P (cmp_op1)
8268 && stack_top_dies
8269 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8270 && REGNO (cmp_op1) != FIRST_STACK_REG)
8272 /* If both the top of the 387 stack dies, and the other operand
8273 is also a stack register that dies, then this must be a
8274 `fcompp' float compare */
8276 if (eflags_p == 1)
8278 /* There is no double popping fcomi variant. Fortunately,
8279 eflags is immune from the fstp's cc clobbering. */
8280 if (unordered_p)
8281 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8282 else
8283 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8284 return "fstp\t%y0";
8286 else
8288 if (eflags_p == 2)
8290 if (unordered_p)
8291 return "fucompp\n\tfnstsw\t%0";
8292 else
8293 return "fcompp\n\tfnstsw\t%0";
8295 else
8297 if (unordered_p)
8298 return "fucompp";
8299 else
8300 return "fcompp";
8304 else
8306 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8308 static const char * const alt[24] =
8310 "fcom%z1\t%y1",
8311 "fcomp%z1\t%y1",
8312 "fucom%z1\t%y1",
8313 "fucomp%z1\t%y1",
8315 "ficom%z1\t%y1",
8316 "ficomp%z1\t%y1",
8317 NULL,
8318 NULL,
8320 "fcomi\t{%y1, %0|%0, %y1}",
8321 "fcomip\t{%y1, %0|%0, %y1}",
8322 "fucomi\t{%y1, %0|%0, %y1}",
8323 "fucomip\t{%y1, %0|%0, %y1}",
8325 NULL,
8326 NULL,
8327 NULL,
8328 NULL,
8330 "fcom%z2\t%y2\n\tfnstsw\t%0",
8331 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8332 "fucom%z2\t%y2\n\tfnstsw\t%0",
8333 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8335 "ficom%z2\t%y2\n\tfnstsw\t%0",
8336 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8337 NULL,
8338 NULL
8341 int mask;
8342 const char *ret;
8344 mask = eflags_p << 3;
8345 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8346 mask |= unordered_p << 1;
8347 mask |= stack_top_dies;
8349 if (mask >= 24)
8350 abort ();
8351 ret = alt[mask];
8352 if (ret == NULL)
8353 abort ();
8355 return ret;
8359 void
8360 ix86_output_addr_vec_elt (FILE *file, int value)
8362 const char *directive = ASM_LONG;
8364 if (TARGET_64BIT)
8366 #ifdef ASM_QUAD
8367 directive = ASM_QUAD;
8368 #else
8369 abort ();
8370 #endif
8373 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8376 void
8377 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8379 if (TARGET_64BIT)
8380 fprintf (file, "%s%s%d-%s%d\n",
8381 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8382 else if (HAVE_AS_GOTOFF_IN_DATA)
8383 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8384 #if TARGET_MACHO
8385 else if (TARGET_MACHO)
8387 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8388 machopic_output_function_base_name (file);
8389 fprintf(file, "\n");
8391 #endif
8392 else
8393 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8394 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8397 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8398 for the target. */
8400 void
8401 ix86_expand_clear (rtx dest)
8403 rtx tmp;
8405 /* We play register width games, which are only valid after reload. */
8406 if (!reload_completed)
8407 abort ();
8409 /* Avoid HImode and its attendant prefix byte. */
8410 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8411 dest = gen_rtx_REG (SImode, REGNO (dest));
8413 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8415 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8416 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8418 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8419 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8422 emit_insn (tmp);
8425 /* X is an unchanging MEM. If it is a constant pool reference, return
8426 the constant pool rtx, else NULL. */
8428 static rtx
8429 maybe_get_pool_constant (rtx x)
8431 x = ix86_delegitimize_address (XEXP (x, 0));
8433 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8434 return get_pool_constant (x);
8436 return NULL_RTX;
8439 void
8440 ix86_expand_move (enum machine_mode mode, rtx operands[])
8442 int strict = (reload_in_progress || reload_completed);
8443 rtx op0, op1;
8444 enum tls_model model;
8446 op0 = operands[0];
8447 op1 = operands[1];
8449 model = tls_symbolic_operand (op1, Pmode);
8450 if (model)
8452 op1 = legitimize_tls_address (op1, model, true);
8453 op1 = force_operand (op1, op0);
8454 if (op1 == op0)
8455 return;
8458 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8460 #if TARGET_MACHO
8461 if (MACHOPIC_PURE)
8463 rtx temp = ((reload_in_progress
8464 || ((op0 && GET_CODE (op0) == REG)
8465 && mode == Pmode))
8466 ? op0 : gen_reg_rtx (Pmode));
8467 op1 = machopic_indirect_data_reference (op1, temp);
8468 op1 = machopic_legitimize_pic_address (op1, mode,
8469 temp == op1 ? 0 : temp);
8471 else if (MACHOPIC_INDIRECT)
8472 op1 = machopic_indirect_data_reference (op1, 0);
8473 if (op0 == op1)
8474 return;
8475 #else
8476 if (GET_CODE (op0) == MEM)
8477 op1 = force_reg (Pmode, op1);
8478 else
8479 op1 = legitimize_address (op1, op1, Pmode);
8480 #endif /* TARGET_MACHO */
8482 else
8484 if (GET_CODE (op0) == MEM
8485 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8486 || !push_operand (op0, mode))
8487 && GET_CODE (op1) == MEM)
8488 op1 = force_reg (mode, op1);
8490 if (push_operand (op0, mode)
8491 && ! general_no_elim_operand (op1, mode))
8492 op1 = copy_to_mode_reg (mode, op1);
8494 /* Force large constants in 64bit compilation into register
8495 to get them CSEed. */
8496 if (TARGET_64BIT && mode == DImode
8497 && immediate_operand (op1, mode)
8498 && !x86_64_zero_extended_value (op1)
8499 && !register_operand (op0, mode)
8500 && optimize && !reload_completed && !reload_in_progress)
8501 op1 = copy_to_mode_reg (mode, op1);
8503 if (FLOAT_MODE_P (mode))
8505 /* If we are loading a floating point constant to a register,
8506 force the value to memory now, since we'll get better code
8507 out the back end. */
8509 if (strict)
8511 else if (GET_CODE (op1) == CONST_DOUBLE)
8513 op1 = validize_mem (force_const_mem (mode, op1));
8514 if (!register_operand (op0, mode))
8516 rtx temp = gen_reg_rtx (mode);
8517 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8518 emit_move_insn (op0, temp);
8519 return;
8525 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8528 void
8529 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8531 /* Force constants other than zero into memory. We do not know how
8532 the instructions used to build constants modify the upper 64 bits
8533 of the register, once we have that information we may be able
8534 to handle some of them more efficiently. */
8535 if ((reload_in_progress | reload_completed) == 0
8536 && register_operand (operands[0], mode)
8537 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8538 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8540 /* Make operand1 a register if it isn't already. */
8541 if (!no_new_pseudos
8542 && !register_operand (operands[0], mode)
8543 && !register_operand (operands[1], mode))
8545 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8546 emit_move_insn (operands[0], temp);
8547 return;
8550 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8553 /* Attempt to expand a binary operator. Make the expansion closer to the
8554 actual machine, then just general_operand, which will allow 3 separate
8555 memory references (one output, two input) in a single insn. */
8557 void
8558 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8559 rtx operands[])
8561 int matching_memory;
8562 rtx src1, src2, dst, op, clob;
8564 dst = operands[0];
8565 src1 = operands[1];
8566 src2 = operands[2];
8568 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8569 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8570 && (rtx_equal_p (dst, src2)
8571 || immediate_operand (src1, mode)))
8573 rtx temp = src1;
8574 src1 = src2;
8575 src2 = temp;
8578 /* If the destination is memory, and we do not have matching source
8579 operands, do things in registers. */
8580 matching_memory = 0;
8581 if (GET_CODE (dst) == MEM)
8583 if (rtx_equal_p (dst, src1))
8584 matching_memory = 1;
8585 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8586 && rtx_equal_p (dst, src2))
8587 matching_memory = 2;
8588 else
8589 dst = gen_reg_rtx (mode);
8592 /* Both source operands cannot be in memory. */
8593 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8595 if (matching_memory != 2)
8596 src2 = force_reg (mode, src2);
8597 else
8598 src1 = force_reg (mode, src1);
8601 /* If the operation is not commutable, source 1 cannot be a constant
8602 or non-matching memory. */
8603 if ((CONSTANT_P (src1)
8604 || (!matching_memory && GET_CODE (src1) == MEM))
8605 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8606 src1 = force_reg (mode, src1);
8608 /* If optimizing, copy to regs to improve CSE */
8609 if (optimize && ! no_new_pseudos)
8611 if (GET_CODE (dst) == MEM)
8612 dst = gen_reg_rtx (mode);
8613 if (GET_CODE (src1) == MEM)
8614 src1 = force_reg (mode, src1);
8615 if (GET_CODE (src2) == MEM)
8616 src2 = force_reg (mode, src2);
8619 /* Emit the instruction. */
8621 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8622 if (reload_in_progress)
8624 /* Reload doesn't know about the flags register, and doesn't know that
8625 it doesn't want to clobber it. We can only do this with PLUS. */
8626 if (code != PLUS)
8627 abort ();
8628 emit_insn (op);
8630 else
8632 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8633 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8636 /* Fix up the destination if needed. */
8637 if (dst != operands[0])
8638 emit_move_insn (operands[0], dst);
8641 /* Return TRUE or FALSE depending on whether the binary operator meets the
8642 appropriate constraints. */
8645 ix86_binary_operator_ok (enum rtx_code code,
8646 enum machine_mode mode ATTRIBUTE_UNUSED,
8647 rtx operands[3])
8649 /* Both source operands cannot be in memory. */
8650 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8651 return 0;
8652 /* If the operation is not commutable, source 1 cannot be a constant. */
8653 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8654 return 0;
8655 /* If the destination is memory, we must have a matching source operand. */
8656 if (GET_CODE (operands[0]) == MEM
8657 && ! (rtx_equal_p (operands[0], operands[1])
8658 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8659 && rtx_equal_p (operands[0], operands[2]))))
8660 return 0;
8661 /* If the operation is not commutable and the source 1 is memory, we must
8662 have a matching destination. */
8663 if (GET_CODE (operands[1]) == MEM
8664 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8665 && ! rtx_equal_p (operands[0], operands[1]))
8666 return 0;
8667 return 1;
8670 /* Attempt to expand a unary operator. Make the expansion closer to the
8671 actual machine, then just general_operand, which will allow 2 separate
8672 memory references (one output, one input) in a single insn. */
8674 void
8675 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8676 rtx operands[])
8678 int matching_memory;
8679 rtx src, dst, op, clob;
8681 dst = operands[0];
8682 src = operands[1];
8684 /* If the destination is memory, and we do not have matching source
8685 operands, do things in registers. */
8686 matching_memory = 0;
8687 if (GET_CODE (dst) == MEM)
8689 if (rtx_equal_p (dst, src))
8690 matching_memory = 1;
8691 else
8692 dst = gen_reg_rtx (mode);
8695 /* When source operand is memory, destination must match. */
8696 if (!matching_memory && GET_CODE (src) == MEM)
8697 src = force_reg (mode, src);
8699 /* If optimizing, copy to regs to improve CSE */
8700 if (optimize && ! no_new_pseudos)
8702 if (GET_CODE (dst) == MEM)
8703 dst = gen_reg_rtx (mode);
8704 if (GET_CODE (src) == MEM)
8705 src = force_reg (mode, src);
8708 /* Emit the instruction. */
8710 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8711 if (reload_in_progress || code == NOT)
8713 /* Reload doesn't know about the flags register, and doesn't know that
8714 it doesn't want to clobber it. */
8715 if (code != NOT)
8716 abort ();
8717 emit_insn (op);
8719 else
8721 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8722 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8725 /* Fix up the destination if needed. */
8726 if (dst != operands[0])
8727 emit_move_insn (operands[0], dst);
8730 /* Return TRUE or FALSE depending on whether the unary operator meets the
8731 appropriate constraints. */
8734 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8735 enum machine_mode mode ATTRIBUTE_UNUSED,
8736 rtx operands[2] ATTRIBUTE_UNUSED)
8738 /* If one of operands is memory, source and destination must match. */
8739 if ((GET_CODE (operands[0]) == MEM
8740 || GET_CODE (operands[1]) == MEM)
8741 && ! rtx_equal_p (operands[0], operands[1]))
8742 return FALSE;
8743 return TRUE;
8746 /* Return TRUE or FALSE depending on whether the first SET in INSN
8747 has source and destination with matching CC modes, and that the
8748 CC mode is at least as constrained as REQ_MODE. */
8751 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8753 rtx set;
8754 enum machine_mode set_mode;
8756 set = PATTERN (insn);
8757 if (GET_CODE (set) == PARALLEL)
8758 set = XVECEXP (set, 0, 0);
8759 if (GET_CODE (set) != SET)
8760 abort ();
8761 if (GET_CODE (SET_SRC (set)) != COMPARE)
8762 abort ();
8764 set_mode = GET_MODE (SET_DEST (set));
8765 switch (set_mode)
8767 case CCNOmode:
8768 if (req_mode != CCNOmode
8769 && (req_mode != CCmode
8770 || XEXP (SET_SRC (set), 1) != const0_rtx))
8771 return 0;
8772 break;
8773 case CCmode:
8774 if (req_mode == CCGCmode)
8775 return 0;
8776 /* FALLTHRU */
8777 case CCGCmode:
8778 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8779 return 0;
8780 /* FALLTHRU */
8781 case CCGOCmode:
8782 if (req_mode == CCZmode)
8783 return 0;
8784 /* FALLTHRU */
8785 case CCZmode:
8786 break;
8788 default:
8789 abort ();
8792 return (GET_MODE (SET_SRC (set)) == set_mode);
8795 /* Generate insn patterns to do an integer compare of OPERANDS. */
8797 static rtx
8798 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8800 enum machine_mode cmpmode;
8801 rtx tmp, flags;
8803 cmpmode = SELECT_CC_MODE (code, op0, op1);
8804 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8806 /* This is very simple, but making the interface the same as in the
8807 FP case makes the rest of the code easier. */
8808 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8809 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8811 /* Return the test that should be put into the flags user, i.e.
8812 the bcc, scc, or cmov instruction. */
8813 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8816 /* Figure out whether to use ordered or unordered fp comparisons.
8817 Return the appropriate mode to use. */
8819 enum machine_mode
8820 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8822 /* ??? In order to make all comparisons reversible, we do all comparisons
8823 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8824 all forms trapping and nontrapping comparisons, we can make inequality
8825 comparisons trapping again, since it results in better code when using
8826 FCOM based compares. */
8827 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8830 enum machine_mode
8831 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8833 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8834 return ix86_fp_compare_mode (code);
8835 switch (code)
8837 /* Only zero flag is needed. */
8838 case EQ: /* ZF=0 */
8839 case NE: /* ZF!=0 */
8840 return CCZmode;
8841 /* Codes needing carry flag. */
8842 case GEU: /* CF=0 */
8843 case GTU: /* CF=0 & ZF=0 */
8844 case LTU: /* CF=1 */
8845 case LEU: /* CF=1 | ZF=1 */
8846 return CCmode;
8847 /* Codes possibly doable only with sign flag when
8848 comparing against zero. */
8849 case GE: /* SF=OF or SF=0 */
8850 case LT: /* SF<>OF or SF=1 */
8851 if (op1 == const0_rtx)
8852 return CCGOCmode;
8853 else
8854 /* For other cases Carry flag is not required. */
8855 return CCGCmode;
8856 /* Codes doable only with sign flag when comparing
8857 against zero, but we miss jump instruction for it
8858 so we need to use relational tests against overflow
8859 that thus needs to be zero. */
8860 case GT: /* ZF=0 & SF=OF */
8861 case LE: /* ZF=1 | SF<>OF */
8862 if (op1 == const0_rtx)
8863 return CCNOmode;
8864 else
8865 return CCGCmode;
8866 /* strcmp pattern do (use flags) and combine may ask us for proper
8867 mode. */
8868 case USE:
8869 return CCmode;
8870 default:
8871 abort ();
8875 /* Return the fixed registers used for condition codes. */
8877 static bool
8878 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8880 *p1 = FLAGS_REG;
8881 *p2 = FPSR_REG;
8882 return true;
8885 /* If two condition code modes are compatible, return a condition code
8886 mode which is compatible with both. Otherwise, return
8887 VOIDmode. */
8889 static enum machine_mode
8890 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8892 if (m1 == m2)
8893 return m1;
8895 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8896 return VOIDmode;
8898 if ((m1 == CCGCmode && m2 == CCGOCmode)
8899 || (m1 == CCGOCmode && m2 == CCGCmode))
8900 return CCGCmode;
8902 switch (m1)
8904 default:
8905 abort ();
8907 case CCmode:
8908 case CCGCmode:
8909 case CCGOCmode:
8910 case CCNOmode:
8911 case CCZmode:
8912 switch (m2)
8914 default:
8915 return VOIDmode;
8917 case CCmode:
8918 case CCGCmode:
8919 case CCGOCmode:
8920 case CCNOmode:
8921 case CCZmode:
8922 return CCmode;
8925 case CCFPmode:
8926 case CCFPUmode:
8927 /* These are only compatible with themselves, which we already
8928 checked above. */
8929 return VOIDmode;
8933 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8936 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8938 enum rtx_code swapped_code = swap_condition (code);
8939 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8940 || (ix86_fp_comparison_cost (swapped_code)
8941 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8944 /* Swap, force into registers, or otherwise massage the two operands
8945 to a fp comparison. The operands are updated in place; the new
8946 comparison code is returned. */
8948 static enum rtx_code
8949 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8951 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8952 rtx op0 = *pop0, op1 = *pop1;
8953 enum machine_mode op_mode = GET_MODE (op0);
8954 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8956 /* All of the unordered compare instructions only work on registers.
8957 The same is true of the XFmode compare instructions. The same is
8958 true of the fcomi compare instructions. */
8960 if (!is_sse
8961 && (fpcmp_mode == CCFPUmode
8962 || op_mode == XFmode
8963 || ix86_use_fcomi_compare (code)))
8965 op0 = force_reg (op_mode, op0);
8966 op1 = force_reg (op_mode, op1);
8968 else
8970 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8971 things around if they appear profitable, otherwise force op0
8972 into a register. */
8974 if (standard_80387_constant_p (op0) == 0
8975 || (GET_CODE (op0) == MEM
8976 && ! (standard_80387_constant_p (op1) == 0
8977 || GET_CODE (op1) == MEM)))
8979 rtx tmp;
8980 tmp = op0, op0 = op1, op1 = tmp;
8981 code = swap_condition (code);
8984 if (GET_CODE (op0) != REG)
8985 op0 = force_reg (op_mode, op0);
8987 if (CONSTANT_P (op1))
8989 if (standard_80387_constant_p (op1))
8990 op1 = force_reg (op_mode, op1);
8991 else
8992 op1 = validize_mem (force_const_mem (op_mode, op1));
8996 /* Try to rearrange the comparison to make it cheaper. */
8997 if (ix86_fp_comparison_cost (code)
8998 > ix86_fp_comparison_cost (swap_condition (code))
8999 && (GET_CODE (op1) == REG || !no_new_pseudos))
9001 rtx tmp;
9002 tmp = op0, op0 = op1, op1 = tmp;
9003 code = swap_condition (code);
9004 if (GET_CODE (op0) != REG)
9005 op0 = force_reg (op_mode, op0);
9008 *pop0 = op0;
9009 *pop1 = op1;
9010 return code;
9013 /* Convert comparison codes we use to represent FP comparison to integer
9014 code that will result in proper branch. Return UNKNOWN if no such code
9015 is available. */
9016 static enum rtx_code
9017 ix86_fp_compare_code_to_integer (enum rtx_code code)
9019 switch (code)
9021 case GT:
9022 return GTU;
9023 case GE:
9024 return GEU;
9025 case ORDERED:
9026 case UNORDERED:
9027 return code;
9028 break;
9029 case UNEQ:
9030 return EQ;
9031 break;
9032 case UNLT:
9033 return LTU;
9034 break;
9035 case UNLE:
9036 return LEU;
9037 break;
9038 case LTGT:
9039 return NE;
9040 break;
9041 default:
9042 return UNKNOWN;
9046 /* Split comparison code CODE into comparisons we can do using branch
9047 instructions. BYPASS_CODE is comparison code for branch that will
9048 branch around FIRST_CODE and SECOND_CODE. If some of branches
9049 is not required, set value to NIL.
9050 We never require more than two branches. */
9051 static void
9052 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9053 enum rtx_code *first_code,
9054 enum rtx_code *second_code)
9056 *first_code = code;
9057 *bypass_code = NIL;
9058 *second_code = NIL;
9060 /* The fcomi comparison sets flags as follows:
9062 cmp ZF PF CF
9063 > 0 0 0
9064 < 0 0 1
9065 = 1 0 0
9066 un 1 1 1 */
9068 switch (code)
9070 case GT: /* GTU - CF=0 & ZF=0 */
9071 case GE: /* GEU - CF=0 */
9072 case ORDERED: /* PF=0 */
9073 case UNORDERED: /* PF=1 */
9074 case UNEQ: /* EQ - ZF=1 */
9075 case UNLT: /* LTU - CF=1 */
9076 case UNLE: /* LEU - CF=1 | ZF=1 */
9077 case LTGT: /* EQ - ZF=0 */
9078 break;
9079 case LT: /* LTU - CF=1 - fails on unordered */
9080 *first_code = UNLT;
9081 *bypass_code = UNORDERED;
9082 break;
9083 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9084 *first_code = UNLE;
9085 *bypass_code = UNORDERED;
9086 break;
9087 case EQ: /* EQ - ZF=1 - fails on unordered */
9088 *first_code = UNEQ;
9089 *bypass_code = UNORDERED;
9090 break;
9091 case NE: /* NE - ZF=0 - fails on unordered */
9092 *first_code = LTGT;
9093 *second_code = UNORDERED;
9094 break;
9095 case UNGE: /* GEU - CF=0 - fails on unordered */
9096 *first_code = GE;
9097 *second_code = UNORDERED;
9098 break;
9099 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9100 *first_code = GT;
9101 *second_code = UNORDERED;
9102 break;
9103 default:
9104 abort ();
9106 if (!TARGET_IEEE_FP)
9108 *second_code = NIL;
9109 *bypass_code = NIL;
9113 /* Return cost of comparison done fcom + arithmetics operations on AX.
9114 All following functions do use number of instructions as a cost metrics.
9115 In future this should be tweaked to compute bytes for optimize_size and
9116 take into account performance of various instructions on various CPUs. */
9117 static int
9118 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9120 if (!TARGET_IEEE_FP)
9121 return 4;
9122 /* The cost of code output by ix86_expand_fp_compare. */
9123 switch (code)
9125 case UNLE:
9126 case UNLT:
9127 case LTGT:
9128 case GT:
9129 case GE:
9130 case UNORDERED:
9131 case ORDERED:
9132 case UNEQ:
9133 return 4;
9134 break;
9135 case LT:
9136 case NE:
9137 case EQ:
9138 case UNGE:
9139 return 5;
9140 break;
9141 case LE:
9142 case UNGT:
9143 return 6;
9144 break;
9145 default:
9146 abort ();
9150 /* Return cost of comparison done using fcomi operation.
9151 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9152 static int
9153 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9155 enum rtx_code bypass_code, first_code, second_code;
9156 /* Return arbitrarily high cost when instruction is not supported - this
9157 prevents gcc from using it. */
9158 if (!TARGET_CMOVE)
9159 return 1024;
9160 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9161 return (bypass_code != NIL || second_code != NIL) + 2;
9164 /* Return cost of comparison done using sahf operation.
9165 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9166 static int
9167 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9169 enum rtx_code bypass_code, first_code, second_code;
9170 /* Return arbitrarily high cost when instruction is not preferred - this
9171 avoids gcc from using it. */
9172 if (!TARGET_USE_SAHF && !optimize_size)
9173 return 1024;
9174 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9175 return (bypass_code != NIL || second_code != NIL) + 3;
9178 /* Compute cost of the comparison done using any method.
9179 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9180 static int
9181 ix86_fp_comparison_cost (enum rtx_code code)
9183 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9184 int min;
9186 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9187 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9189 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9190 if (min > sahf_cost)
9191 min = sahf_cost;
9192 if (min > fcomi_cost)
9193 min = fcomi_cost;
9194 return min;
9197 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9199 static rtx
9200 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9201 rtx *second_test, rtx *bypass_test)
9203 enum machine_mode fpcmp_mode, intcmp_mode;
9204 rtx tmp, tmp2;
9205 int cost = ix86_fp_comparison_cost (code);
9206 enum rtx_code bypass_code, first_code, second_code;
9208 fpcmp_mode = ix86_fp_compare_mode (code);
9209 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9211 if (second_test)
9212 *second_test = NULL_RTX;
9213 if (bypass_test)
9214 *bypass_test = NULL_RTX;
9216 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9218 /* Do fcomi/sahf based test when profitable. */
9219 if ((bypass_code == NIL || bypass_test)
9220 && (second_code == NIL || second_test)
9221 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9223 if (TARGET_CMOVE)
9225 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9226 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9227 tmp);
9228 emit_insn (tmp);
9230 else
9232 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9233 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9234 if (!scratch)
9235 scratch = gen_reg_rtx (HImode);
9236 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9237 emit_insn (gen_x86_sahf_1 (scratch));
9240 /* The FP codes work out to act like unsigned. */
9241 intcmp_mode = fpcmp_mode;
9242 code = first_code;
9243 if (bypass_code != NIL)
9244 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9245 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9246 const0_rtx);
9247 if (second_code != NIL)
9248 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9249 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9250 const0_rtx);
9252 else
9254 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9255 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9256 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9257 if (!scratch)
9258 scratch = gen_reg_rtx (HImode);
9259 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9261 /* In the unordered case, we have to check C2 for NaN's, which
9262 doesn't happen to work out to anything nice combination-wise.
9263 So do some bit twiddling on the value we've got in AH to come
9264 up with an appropriate set of condition codes. */
9266 intcmp_mode = CCNOmode;
9267 switch (code)
9269 case GT:
9270 case UNGT:
9271 if (code == GT || !TARGET_IEEE_FP)
9273 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9274 code = EQ;
9276 else
9278 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9279 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9280 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9281 intcmp_mode = CCmode;
9282 code = GEU;
9284 break;
9285 case LT:
9286 case UNLT:
9287 if (code == LT && TARGET_IEEE_FP)
9289 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9290 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9291 intcmp_mode = CCmode;
9292 code = EQ;
9294 else
9296 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9297 code = NE;
9299 break;
9300 case GE:
9301 case UNGE:
9302 if (code == GE || !TARGET_IEEE_FP)
9304 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9305 code = EQ;
9307 else
9309 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9310 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9311 GEN_INT (0x01)));
9312 code = NE;
9314 break;
9315 case LE:
9316 case UNLE:
9317 if (code == LE && TARGET_IEEE_FP)
9319 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9320 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9321 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9322 intcmp_mode = CCmode;
9323 code = LTU;
9325 else
9327 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9328 code = NE;
9330 break;
9331 case EQ:
9332 case UNEQ:
9333 if (code == EQ && TARGET_IEEE_FP)
9335 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9336 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9337 intcmp_mode = CCmode;
9338 code = EQ;
9340 else
9342 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9343 code = NE;
9344 break;
9346 break;
9347 case NE:
9348 case LTGT:
9349 if (code == NE && TARGET_IEEE_FP)
9351 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9352 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9353 GEN_INT (0x40)));
9354 code = NE;
9356 else
9358 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9359 code = EQ;
9361 break;
9363 case UNORDERED:
9364 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9365 code = NE;
9366 break;
9367 case ORDERED:
9368 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9369 code = EQ;
9370 break;
9372 default:
9373 abort ();
9377 /* Return the test that should be put into the flags user, i.e.
9378 the bcc, scc, or cmov instruction. */
9379 return gen_rtx_fmt_ee (code, VOIDmode,
9380 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9381 const0_rtx);
9385 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9387 rtx op0, op1, ret;
9388 op0 = ix86_compare_op0;
9389 op1 = ix86_compare_op1;
9391 if (second_test)
9392 *second_test = NULL_RTX;
9393 if (bypass_test)
9394 *bypass_test = NULL_RTX;
9396 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9397 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9398 second_test, bypass_test);
9399 else
9400 ret = ix86_expand_int_compare (code, op0, op1);
9402 return ret;
9405 /* Return true if the CODE will result in nontrivial jump sequence. */
9406 bool
9407 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9409 enum rtx_code bypass_code, first_code, second_code;
9410 if (!TARGET_CMOVE)
9411 return true;
9412 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9413 return bypass_code != NIL || second_code != NIL;
9416 void
9417 ix86_expand_branch (enum rtx_code code, rtx label)
9419 rtx tmp;
9421 switch (GET_MODE (ix86_compare_op0))
9423 case QImode:
9424 case HImode:
9425 case SImode:
9426 simple:
9427 tmp = ix86_expand_compare (code, NULL, NULL);
9428 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9429 gen_rtx_LABEL_REF (VOIDmode, label),
9430 pc_rtx);
9431 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9432 return;
9434 case SFmode:
9435 case DFmode:
9436 case XFmode:
9438 rtvec vec;
9439 int use_fcomi;
9440 enum rtx_code bypass_code, first_code, second_code;
9442 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9443 &ix86_compare_op1);
9445 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9447 /* Check whether we will use the natural sequence with one jump. If
9448 so, we can expand jump early. Otherwise delay expansion by
9449 creating compound insn to not confuse optimizers. */
9450 if (bypass_code == NIL && second_code == NIL
9451 && TARGET_CMOVE)
9453 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9454 gen_rtx_LABEL_REF (VOIDmode, label),
9455 pc_rtx, NULL_RTX);
9457 else
9459 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9460 ix86_compare_op0, ix86_compare_op1);
9461 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9462 gen_rtx_LABEL_REF (VOIDmode, label),
9463 pc_rtx);
9464 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9466 use_fcomi = ix86_use_fcomi_compare (code);
9467 vec = rtvec_alloc (3 + !use_fcomi);
9468 RTVEC_ELT (vec, 0) = tmp;
9469 RTVEC_ELT (vec, 1)
9470 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9471 RTVEC_ELT (vec, 2)
9472 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9473 if (! use_fcomi)
9474 RTVEC_ELT (vec, 3)
9475 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9477 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9479 return;
9482 case DImode:
9483 if (TARGET_64BIT)
9484 goto simple;
9485 /* Expand DImode branch into multiple compare+branch. */
9487 rtx lo[2], hi[2], label2;
9488 enum rtx_code code1, code2, code3;
9490 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9492 tmp = ix86_compare_op0;
9493 ix86_compare_op0 = ix86_compare_op1;
9494 ix86_compare_op1 = tmp;
9495 code = swap_condition (code);
9497 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9498 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9500 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9501 avoid two branches. This costs one extra insn, so disable when
9502 optimizing for size. */
9504 if ((code == EQ || code == NE)
9505 && (!optimize_size
9506 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9508 rtx xor0, xor1;
9510 xor1 = hi[0];
9511 if (hi[1] != const0_rtx)
9512 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9513 NULL_RTX, 0, OPTAB_WIDEN);
9515 xor0 = lo[0];
9516 if (lo[1] != const0_rtx)
9517 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9518 NULL_RTX, 0, OPTAB_WIDEN);
9520 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9521 NULL_RTX, 0, OPTAB_WIDEN);
9523 ix86_compare_op0 = tmp;
9524 ix86_compare_op1 = const0_rtx;
9525 ix86_expand_branch (code, label);
9526 return;
9529 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9530 op1 is a constant and the low word is zero, then we can just
9531 examine the high word. */
9533 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9534 switch (code)
9536 case LT: case LTU: case GE: case GEU:
9537 ix86_compare_op0 = hi[0];
9538 ix86_compare_op1 = hi[1];
9539 ix86_expand_branch (code, label);
9540 return;
9541 default:
9542 break;
9545 /* Otherwise, we need two or three jumps. */
9547 label2 = gen_label_rtx ();
9549 code1 = code;
9550 code2 = swap_condition (code);
9551 code3 = unsigned_condition (code);
9553 switch (code)
9555 case LT: case GT: case LTU: case GTU:
9556 break;
9558 case LE: code1 = LT; code2 = GT; break;
9559 case GE: code1 = GT; code2 = LT; break;
9560 case LEU: code1 = LTU; code2 = GTU; break;
9561 case GEU: code1 = GTU; code2 = LTU; break;
9563 case EQ: code1 = NIL; code2 = NE; break;
9564 case NE: code2 = NIL; break;
9566 default:
9567 abort ();
9571 * a < b =>
9572 * if (hi(a) < hi(b)) goto true;
9573 * if (hi(a) > hi(b)) goto false;
9574 * if (lo(a) < lo(b)) goto true;
9575 * false:
9578 ix86_compare_op0 = hi[0];
9579 ix86_compare_op1 = hi[1];
9581 if (code1 != NIL)
9582 ix86_expand_branch (code1, label);
9583 if (code2 != NIL)
9584 ix86_expand_branch (code2, label2);
9586 ix86_compare_op0 = lo[0];
9587 ix86_compare_op1 = lo[1];
9588 ix86_expand_branch (code3, label);
9590 if (code2 != NIL)
9591 emit_label (label2);
9592 return;
9595 default:
9596 abort ();
9600 /* Split branch based on floating point condition. */
9601 void
9602 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9603 rtx target1, rtx target2, rtx tmp)
9605 rtx second, bypass;
9606 rtx label = NULL_RTX;
9607 rtx condition;
9608 int bypass_probability = -1, second_probability = -1, probability = -1;
9609 rtx i;
9611 if (target2 != pc_rtx)
9613 rtx tmp = target2;
9614 code = reverse_condition_maybe_unordered (code);
9615 target2 = target1;
9616 target1 = tmp;
9619 condition = ix86_expand_fp_compare (code, op1, op2,
9620 tmp, &second, &bypass);
9622 if (split_branch_probability >= 0)
9624 /* Distribute the probabilities across the jumps.
9625 Assume the BYPASS and SECOND to be always test
9626 for UNORDERED. */
9627 probability = split_branch_probability;
9629 /* Value of 1 is low enough to make no need for probability
9630 to be updated. Later we may run some experiments and see
9631 if unordered values are more frequent in practice. */
9632 if (bypass)
9633 bypass_probability = 1;
9634 if (second)
9635 second_probability = 1;
9637 if (bypass != NULL_RTX)
9639 label = gen_label_rtx ();
9640 i = emit_jump_insn (gen_rtx_SET
9641 (VOIDmode, pc_rtx,
9642 gen_rtx_IF_THEN_ELSE (VOIDmode,
9643 bypass,
9644 gen_rtx_LABEL_REF (VOIDmode,
9645 label),
9646 pc_rtx)));
9647 if (bypass_probability >= 0)
9648 REG_NOTES (i)
9649 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9650 GEN_INT (bypass_probability),
9651 REG_NOTES (i));
9653 i = emit_jump_insn (gen_rtx_SET
9654 (VOIDmode, pc_rtx,
9655 gen_rtx_IF_THEN_ELSE (VOIDmode,
9656 condition, target1, target2)));
9657 if (probability >= 0)
9658 REG_NOTES (i)
9659 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9660 GEN_INT (probability),
9661 REG_NOTES (i));
9662 if (second != NULL_RTX)
9664 i = emit_jump_insn (gen_rtx_SET
9665 (VOIDmode, pc_rtx,
9666 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9667 target2)));
9668 if (second_probability >= 0)
9669 REG_NOTES (i)
9670 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9671 GEN_INT (second_probability),
9672 REG_NOTES (i));
9674 if (label != NULL_RTX)
9675 emit_label (label);
9679 ix86_expand_setcc (enum rtx_code code, rtx dest)
9681 rtx ret, tmp, tmpreg, equiv;
9682 rtx second_test, bypass_test;
9684 if (GET_MODE (ix86_compare_op0) == DImode
9685 && !TARGET_64BIT)
9686 return 0; /* FAIL */
9688 if (GET_MODE (dest) != QImode)
9689 abort ();
9691 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9692 PUT_MODE (ret, QImode);
9694 tmp = dest;
9695 tmpreg = dest;
9697 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9698 if (bypass_test || second_test)
9700 rtx test = second_test;
9701 int bypass = 0;
9702 rtx tmp2 = gen_reg_rtx (QImode);
9703 if (bypass_test)
9705 if (second_test)
9706 abort ();
9707 test = bypass_test;
9708 bypass = 1;
9709 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9711 PUT_MODE (test, QImode);
9712 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9714 if (bypass)
9715 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9716 else
9717 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9720 /* Attach a REG_EQUAL note describing the comparison result. */
9721 equiv = simplify_gen_relational (code, QImode,
9722 GET_MODE (ix86_compare_op0),
9723 ix86_compare_op0, ix86_compare_op1);
9724 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9726 return 1; /* DONE */
9729 /* Expand comparison setting or clearing carry flag. Return true when
9730 successful and set pop for the operation. */
9731 static bool
9732 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9734 enum machine_mode mode =
9735 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9737 /* Do not handle DImode compares that go trought special path. Also we can't
9738 deal with FP compares yet. This is possible to add. */
9739 if ((mode == DImode && !TARGET_64BIT))
9740 return false;
9741 if (FLOAT_MODE_P (mode))
9743 rtx second_test = NULL, bypass_test = NULL;
9744 rtx compare_op, compare_seq;
9746 /* Shortcut: following common codes never translate into carry flag compares. */
9747 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9748 || code == ORDERED || code == UNORDERED)
9749 return false;
9751 /* These comparisons require zero flag; swap operands so they won't. */
9752 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9753 && !TARGET_IEEE_FP)
9755 rtx tmp = op0;
9756 op0 = op1;
9757 op1 = tmp;
9758 code = swap_condition (code);
9761 /* Try to expand the comparison and verify that we end up with carry flag
9762 based comparison. This is fails to be true only when we decide to expand
9763 comparison using arithmetic that is not too common scenario. */
9764 start_sequence ();
9765 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9766 &second_test, &bypass_test);
9767 compare_seq = get_insns ();
9768 end_sequence ();
9770 if (second_test || bypass_test)
9771 return false;
9772 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9773 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9774 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9775 else
9776 code = GET_CODE (compare_op);
9777 if (code != LTU && code != GEU)
9778 return false;
9779 emit_insn (compare_seq);
9780 *pop = compare_op;
9781 return true;
9783 if (!INTEGRAL_MODE_P (mode))
9784 return false;
9785 switch (code)
9787 case LTU:
9788 case GEU:
9789 break;
9791 /* Convert a==0 into (unsigned)a<1. */
9792 case EQ:
9793 case NE:
9794 if (op1 != const0_rtx)
9795 return false;
9796 op1 = const1_rtx;
9797 code = (code == EQ ? LTU : GEU);
9798 break;
9800 /* Convert a>b into b<a or a>=b-1. */
9801 case GTU:
9802 case LEU:
9803 if (GET_CODE (op1) == CONST_INT)
9805 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9806 /* Bail out on overflow. We still can swap operands but that
9807 would force loading of the constant into register. */
9808 if (op1 == const0_rtx
9809 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9810 return false;
9811 code = (code == GTU ? GEU : LTU);
9813 else
9815 rtx tmp = op1;
9816 op1 = op0;
9817 op0 = tmp;
9818 code = (code == GTU ? LTU : GEU);
9820 break;
9822 /* Convert a>=0 into (unsigned)a<0x80000000. */
9823 case LT:
9824 case GE:
9825 if (mode == DImode || op1 != const0_rtx)
9826 return false;
9827 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9828 code = (code == LT ? GEU : LTU);
9829 break;
9830 case LE:
9831 case GT:
9832 if (mode == DImode || op1 != constm1_rtx)
9833 return false;
9834 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9835 code = (code == LE ? GEU : LTU);
9836 break;
9838 default:
9839 return false;
9841 /* Swapping operands may cause constant to appear as first operand. */
9842 if (!nonimmediate_operand (op0, VOIDmode))
9844 if (no_new_pseudos)
9845 return false;
9846 op0 = force_reg (mode, op0);
9848 ix86_compare_op0 = op0;
9849 ix86_compare_op1 = op1;
9850 *pop = ix86_expand_compare (code, NULL, NULL);
9851 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9852 abort ();
9853 return true;
9857 ix86_expand_int_movcc (rtx operands[])
9859 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9860 rtx compare_seq, compare_op;
9861 rtx second_test, bypass_test;
9862 enum machine_mode mode = GET_MODE (operands[0]);
9863 bool sign_bit_compare_p = false;;
9865 start_sequence ();
9866 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9867 compare_seq = get_insns ();
9868 end_sequence ();
9870 compare_code = GET_CODE (compare_op);
9872 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9873 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9874 sign_bit_compare_p = true;
9876 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9877 HImode insns, we'd be swallowed in word prefix ops. */
9879 if ((mode != HImode || TARGET_FAST_PREFIX)
9880 && (mode != DImode || TARGET_64BIT)
9881 && GET_CODE (operands[2]) == CONST_INT
9882 && GET_CODE (operands[3]) == CONST_INT)
9884 rtx out = operands[0];
9885 HOST_WIDE_INT ct = INTVAL (operands[2]);
9886 HOST_WIDE_INT cf = INTVAL (operands[3]);
9887 HOST_WIDE_INT diff;
9889 diff = ct - cf;
9890 /* Sign bit compares are better done using shifts than we do by using
9891 sbb. */
9892 if (sign_bit_compare_p
9893 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9894 ix86_compare_op1, &compare_op))
9896 /* Detect overlap between destination and compare sources. */
9897 rtx tmp = out;
9899 if (!sign_bit_compare_p)
9901 bool fpcmp = false;
9903 compare_code = GET_CODE (compare_op);
9905 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9906 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9908 fpcmp = true;
9909 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9912 /* To simplify rest of code, restrict to the GEU case. */
9913 if (compare_code == LTU)
9915 HOST_WIDE_INT tmp = ct;
9916 ct = cf;
9917 cf = tmp;
9918 compare_code = reverse_condition (compare_code);
9919 code = reverse_condition (code);
9921 else
9923 if (fpcmp)
9924 PUT_CODE (compare_op,
9925 reverse_condition_maybe_unordered
9926 (GET_CODE (compare_op)));
9927 else
9928 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9930 diff = ct - cf;
9932 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9933 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9934 tmp = gen_reg_rtx (mode);
9936 if (mode == DImode)
9937 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9938 else
9939 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9941 else
9943 if (code == GT || code == GE)
9944 code = reverse_condition (code);
9945 else
9947 HOST_WIDE_INT tmp = ct;
9948 ct = cf;
9949 cf = tmp;
9950 diff = ct - cf;
9952 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9953 ix86_compare_op1, VOIDmode, 0, -1);
9956 if (diff == 1)
9959 * cmpl op0,op1
9960 * sbbl dest,dest
9961 * [addl dest, ct]
9963 * Size 5 - 8.
9965 if (ct)
9966 tmp = expand_simple_binop (mode, PLUS,
9967 tmp, GEN_INT (ct),
9968 copy_rtx (tmp), 1, OPTAB_DIRECT);
9970 else if (cf == -1)
9973 * cmpl op0,op1
9974 * sbbl dest,dest
9975 * orl $ct, dest
9977 * Size 8.
9979 tmp = expand_simple_binop (mode, IOR,
9980 tmp, GEN_INT (ct),
9981 copy_rtx (tmp), 1, OPTAB_DIRECT);
9983 else if (diff == -1 && ct)
9986 * cmpl op0,op1
9987 * sbbl dest,dest
9988 * notl dest
9989 * [addl dest, cf]
9991 * Size 8 - 11.
9993 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9994 if (cf)
9995 tmp = expand_simple_binop (mode, PLUS,
9996 copy_rtx (tmp), GEN_INT (cf),
9997 copy_rtx (tmp), 1, OPTAB_DIRECT);
9999 else
10002 * cmpl op0,op1
10003 * sbbl dest,dest
10004 * [notl dest]
10005 * andl cf - ct, dest
10006 * [addl dest, ct]
10008 * Size 8 - 11.
10011 if (cf == 0)
10013 cf = ct;
10014 ct = 0;
10015 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10018 tmp = expand_simple_binop (mode, AND,
10019 copy_rtx (tmp),
10020 gen_int_mode (cf - ct, mode),
10021 copy_rtx (tmp), 1, OPTAB_DIRECT);
10022 if (ct)
10023 tmp = expand_simple_binop (mode, PLUS,
10024 copy_rtx (tmp), GEN_INT (ct),
10025 copy_rtx (tmp), 1, OPTAB_DIRECT);
10028 if (!rtx_equal_p (tmp, out))
10029 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10031 return 1; /* DONE */
10034 if (diff < 0)
10036 HOST_WIDE_INT tmp;
10037 tmp = ct, ct = cf, cf = tmp;
10038 diff = -diff;
10039 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10041 /* We may be reversing unordered compare to normal compare, that
10042 is not valid in general (we may convert non-trapping condition
10043 to trapping one), however on i386 we currently emit all
10044 comparisons unordered. */
10045 compare_code = reverse_condition_maybe_unordered (compare_code);
10046 code = reverse_condition_maybe_unordered (code);
10048 else
10050 compare_code = reverse_condition (compare_code);
10051 code = reverse_condition (code);
10055 compare_code = NIL;
10056 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10057 && GET_CODE (ix86_compare_op1) == CONST_INT)
10059 if (ix86_compare_op1 == const0_rtx
10060 && (code == LT || code == GE))
10061 compare_code = code;
10062 else if (ix86_compare_op1 == constm1_rtx)
10064 if (code == LE)
10065 compare_code = LT;
10066 else if (code == GT)
10067 compare_code = GE;
10071 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10072 if (compare_code != NIL
10073 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10074 && (cf == -1 || ct == -1))
10076 /* If lea code below could be used, only optimize
10077 if it results in a 2 insn sequence. */
10079 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10080 || diff == 3 || diff == 5 || diff == 9)
10081 || (compare_code == LT && ct == -1)
10082 || (compare_code == GE && cf == -1))
10085 * notl op1 (if necessary)
10086 * sarl $31, op1
10087 * orl cf, op1
10089 if (ct != -1)
10091 cf = ct;
10092 ct = -1;
10093 code = reverse_condition (code);
10096 out = emit_store_flag (out, code, ix86_compare_op0,
10097 ix86_compare_op1, VOIDmode, 0, -1);
10099 out = expand_simple_binop (mode, IOR,
10100 out, GEN_INT (cf),
10101 out, 1, OPTAB_DIRECT);
10102 if (out != operands[0])
10103 emit_move_insn (operands[0], out);
10105 return 1; /* DONE */
10110 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10111 || diff == 3 || diff == 5 || diff == 9)
10112 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10113 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10116 * xorl dest,dest
10117 * cmpl op1,op2
10118 * setcc dest
10119 * lea cf(dest*(ct-cf)),dest
10121 * Size 14.
10123 * This also catches the degenerate setcc-only case.
10126 rtx tmp;
10127 int nops;
10129 out = emit_store_flag (out, code, ix86_compare_op0,
10130 ix86_compare_op1, VOIDmode, 0, 1);
10132 nops = 0;
10133 /* On x86_64 the lea instruction operates on Pmode, so we need
10134 to get arithmetics done in proper mode to match. */
10135 if (diff == 1)
10136 tmp = copy_rtx (out);
10137 else
10139 rtx out1;
10140 out1 = copy_rtx (out);
10141 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10142 nops++;
10143 if (diff & 1)
10145 tmp = gen_rtx_PLUS (mode, tmp, out1);
10146 nops++;
10149 if (cf != 0)
10151 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10152 nops++;
10154 if (!rtx_equal_p (tmp, out))
10156 if (nops == 1)
10157 out = force_operand (tmp, copy_rtx (out));
10158 else
10159 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10161 if (!rtx_equal_p (out, operands[0]))
10162 emit_move_insn (operands[0], copy_rtx (out));
10164 return 1; /* DONE */
10168 * General case: Jumpful:
10169 * xorl dest,dest cmpl op1, op2
10170 * cmpl op1, op2 movl ct, dest
10171 * setcc dest jcc 1f
10172 * decl dest movl cf, dest
10173 * andl (cf-ct),dest 1:
10174 * addl ct,dest
10176 * Size 20. Size 14.
10178 * This is reasonably steep, but branch mispredict costs are
10179 * high on modern cpus, so consider failing only if optimizing
10180 * for space.
10183 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10184 && BRANCH_COST >= 2)
10186 if (cf == 0)
10188 cf = ct;
10189 ct = 0;
10190 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10191 /* We may be reversing unordered compare to normal compare,
10192 that is not valid in general (we may convert non-trapping
10193 condition to trapping one), however on i386 we currently
10194 emit all comparisons unordered. */
10195 code = reverse_condition_maybe_unordered (code);
10196 else
10198 code = reverse_condition (code);
10199 if (compare_code != NIL)
10200 compare_code = reverse_condition (compare_code);
10204 if (compare_code != NIL)
10206 /* notl op1 (if needed)
10207 sarl $31, op1
10208 andl (cf-ct), op1
10209 addl ct, op1
10211 For x < 0 (resp. x <= -1) there will be no notl,
10212 so if possible swap the constants to get rid of the
10213 complement.
10214 True/false will be -1/0 while code below (store flag
10215 followed by decrement) is 0/-1, so the constants need
10216 to be exchanged once more. */
10218 if (compare_code == GE || !cf)
10220 code = reverse_condition (code);
10221 compare_code = LT;
10223 else
10225 HOST_WIDE_INT tmp = cf;
10226 cf = ct;
10227 ct = tmp;
10230 out = emit_store_flag (out, code, ix86_compare_op0,
10231 ix86_compare_op1, VOIDmode, 0, -1);
10233 else
10235 out = emit_store_flag (out, code, ix86_compare_op0,
10236 ix86_compare_op1, VOIDmode, 0, 1);
10238 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10239 copy_rtx (out), 1, OPTAB_DIRECT);
10242 out = expand_simple_binop (mode, AND, copy_rtx (out),
10243 gen_int_mode (cf - ct, mode),
10244 copy_rtx (out), 1, OPTAB_DIRECT);
10245 if (ct)
10246 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10247 copy_rtx (out), 1, OPTAB_DIRECT);
10248 if (!rtx_equal_p (out, operands[0]))
10249 emit_move_insn (operands[0], copy_rtx (out));
10251 return 1; /* DONE */
10255 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10257 /* Try a few things more with specific constants and a variable. */
10259 optab op;
10260 rtx var, orig_out, out, tmp;
10262 if (BRANCH_COST <= 2)
10263 return 0; /* FAIL */
10265 /* If one of the two operands is an interesting constant, load a
10266 constant with the above and mask it in with a logical operation. */
10268 if (GET_CODE (operands[2]) == CONST_INT)
10270 var = operands[3];
10271 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10272 operands[3] = constm1_rtx, op = and_optab;
10273 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10274 operands[3] = const0_rtx, op = ior_optab;
10275 else
10276 return 0; /* FAIL */
10278 else if (GET_CODE (operands[3]) == CONST_INT)
10280 var = operands[2];
10281 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10282 operands[2] = constm1_rtx, op = and_optab;
10283 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10284 operands[2] = const0_rtx, op = ior_optab;
10285 else
10286 return 0; /* FAIL */
10288 else
10289 return 0; /* FAIL */
10291 orig_out = operands[0];
10292 tmp = gen_reg_rtx (mode);
10293 operands[0] = tmp;
10295 /* Recurse to get the constant loaded. */
10296 if (ix86_expand_int_movcc (operands) == 0)
10297 return 0; /* FAIL */
10299 /* Mask in the interesting variable. */
10300 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10301 OPTAB_WIDEN);
10302 if (!rtx_equal_p (out, orig_out))
10303 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10305 return 1; /* DONE */
10309 * For comparison with above,
10311 * movl cf,dest
10312 * movl ct,tmp
10313 * cmpl op1,op2
10314 * cmovcc tmp,dest
10316 * Size 15.
10319 if (! nonimmediate_operand (operands[2], mode))
10320 operands[2] = force_reg (mode, operands[2]);
10321 if (! nonimmediate_operand (operands[3], mode))
10322 operands[3] = force_reg (mode, operands[3]);
10324 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10326 rtx tmp = gen_reg_rtx (mode);
10327 emit_move_insn (tmp, operands[3]);
10328 operands[3] = tmp;
10330 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10332 rtx tmp = gen_reg_rtx (mode);
10333 emit_move_insn (tmp, operands[2]);
10334 operands[2] = tmp;
10337 if (! register_operand (operands[2], VOIDmode)
10338 && (mode == QImode
10339 || ! register_operand (operands[3], VOIDmode)))
10340 operands[2] = force_reg (mode, operands[2]);
10342 if (mode == QImode
10343 && ! register_operand (operands[3], VOIDmode))
10344 operands[3] = force_reg (mode, operands[3]);
10346 emit_insn (compare_seq);
10347 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10348 gen_rtx_IF_THEN_ELSE (mode,
10349 compare_op, operands[2],
10350 operands[3])));
10351 if (bypass_test)
10352 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10353 gen_rtx_IF_THEN_ELSE (mode,
10354 bypass_test,
10355 copy_rtx (operands[3]),
10356 copy_rtx (operands[0]))));
10357 if (second_test)
10358 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10359 gen_rtx_IF_THEN_ELSE (mode,
10360 second_test,
10361 copy_rtx (operands[2]),
10362 copy_rtx (operands[0]))));
10364 return 1; /* DONE */
10368 ix86_expand_fp_movcc (rtx operands[])
10370 enum rtx_code code;
10371 rtx tmp;
10372 rtx compare_op, second_test, bypass_test;
10374 /* For SF/DFmode conditional moves based on comparisons
10375 in same mode, we may want to use SSE min/max instructions. */
10376 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10377 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10378 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10379 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10380 && (!TARGET_IEEE_FP
10381 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10382 /* We may be called from the post-reload splitter. */
10383 && (!REG_P (operands[0])
10384 || SSE_REG_P (operands[0])
10385 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10387 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10388 code = GET_CODE (operands[1]);
10390 /* See if we have (cross) match between comparison operands and
10391 conditional move operands. */
10392 if (rtx_equal_p (operands[2], op1))
10394 rtx tmp = op0;
10395 op0 = op1;
10396 op1 = tmp;
10397 code = reverse_condition_maybe_unordered (code);
10399 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10401 /* Check for min operation. */
10402 if (code == LT || code == UNLE)
10404 if (code == UNLE)
10406 rtx tmp = op0;
10407 op0 = op1;
10408 op1 = tmp;
10410 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10411 if (memory_operand (op0, VOIDmode))
10412 op0 = force_reg (GET_MODE (operands[0]), op0);
10413 if (GET_MODE (operands[0]) == SFmode)
10414 emit_insn (gen_minsf3 (operands[0], op0, op1));
10415 else
10416 emit_insn (gen_mindf3 (operands[0], op0, op1));
10417 return 1;
10419 /* Check for max operation. */
10420 if (code == GT || code == UNGE)
10422 if (code == UNGE)
10424 rtx tmp = op0;
10425 op0 = op1;
10426 op1 = tmp;
10428 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10429 if (memory_operand (op0, VOIDmode))
10430 op0 = force_reg (GET_MODE (operands[0]), op0);
10431 if (GET_MODE (operands[0]) == SFmode)
10432 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10433 else
10434 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10435 return 1;
10438 /* Manage condition to be sse_comparison_operator. In case we are
10439 in non-ieee mode, try to canonicalize the destination operand
10440 to be first in the comparison - this helps reload to avoid extra
10441 moves. */
10442 if (!sse_comparison_operator (operands[1], VOIDmode)
10443 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10445 rtx tmp = ix86_compare_op0;
10446 ix86_compare_op0 = ix86_compare_op1;
10447 ix86_compare_op1 = tmp;
10448 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10449 VOIDmode, ix86_compare_op0,
10450 ix86_compare_op1);
10452 /* Similarly try to manage result to be first operand of conditional
10453 move. We also don't support the NE comparison on SSE, so try to
10454 avoid it. */
10455 if ((rtx_equal_p (operands[0], operands[3])
10456 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10457 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10459 rtx tmp = operands[2];
10460 operands[2] = operands[3];
10461 operands[3] = tmp;
10462 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10463 (GET_CODE (operands[1])),
10464 VOIDmode, ix86_compare_op0,
10465 ix86_compare_op1);
10467 if (GET_MODE (operands[0]) == SFmode)
10468 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10469 operands[2], operands[3],
10470 ix86_compare_op0, ix86_compare_op1));
10471 else
10472 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10473 operands[2], operands[3],
10474 ix86_compare_op0, ix86_compare_op1));
10475 return 1;
10478 /* The floating point conditional move instructions don't directly
10479 support conditions resulting from a signed integer comparison. */
10481 code = GET_CODE (operands[1]);
10482 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10484 /* The floating point conditional move instructions don't directly
10485 support signed integer comparisons. */
10487 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10489 if (second_test != NULL || bypass_test != NULL)
10490 abort ();
10491 tmp = gen_reg_rtx (QImode);
10492 ix86_expand_setcc (code, tmp);
10493 code = NE;
10494 ix86_compare_op0 = tmp;
10495 ix86_compare_op1 = const0_rtx;
10496 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10498 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10500 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10501 emit_move_insn (tmp, operands[3]);
10502 operands[3] = tmp;
10504 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10506 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10507 emit_move_insn (tmp, operands[2]);
10508 operands[2] = tmp;
10511 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10512 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10513 compare_op,
10514 operands[2],
10515 operands[3])));
10516 if (bypass_test)
10517 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10518 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10519 bypass_test,
10520 operands[3],
10521 operands[0])));
10522 if (second_test)
10523 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10524 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10525 second_test,
10526 operands[2],
10527 operands[0])));
10529 return 1;
10532 /* Expand conditional increment or decrement using adb/sbb instructions.
10533 The default case using setcc followed by the conditional move can be
10534 done by generic code. */
10536 ix86_expand_int_addcc (rtx operands[])
10538 enum rtx_code code = GET_CODE (operands[1]);
10539 rtx compare_op;
10540 rtx val = const0_rtx;
10541 bool fpcmp = false;
10542 enum machine_mode mode = GET_MODE (operands[0]);
10544 if (operands[3] != const1_rtx
10545 && operands[3] != constm1_rtx)
10546 return 0;
10547 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10548 ix86_compare_op1, &compare_op))
10549 return 0;
10550 code = GET_CODE (compare_op);
10552 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10553 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10555 fpcmp = true;
10556 code = ix86_fp_compare_code_to_integer (code);
10559 if (code != LTU)
10561 val = constm1_rtx;
10562 if (fpcmp)
10563 PUT_CODE (compare_op,
10564 reverse_condition_maybe_unordered
10565 (GET_CODE (compare_op)));
10566 else
10567 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10569 PUT_MODE (compare_op, mode);
10571 /* Construct either adc or sbb insn. */
10572 if ((code == LTU) == (operands[3] == constm1_rtx))
10574 switch (GET_MODE (operands[0]))
10576 case QImode:
10577 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10578 break;
10579 case HImode:
10580 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10581 break;
10582 case SImode:
10583 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10584 break;
10585 case DImode:
10586 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10587 break;
10588 default:
10589 abort ();
10592 else
10594 switch (GET_MODE (operands[0]))
10596 case QImode:
10597 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10598 break;
10599 case HImode:
10600 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10601 break;
10602 case SImode:
10603 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10604 break;
10605 case DImode:
10606 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10607 break;
10608 default:
10609 abort ();
10612 return 1; /* DONE */
10616 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10617 works for floating pointer parameters and nonoffsetable memories.
10618 For pushes, it returns just stack offsets; the values will be saved
10619 in the right order. Maximally three parts are generated. */
10621 static int
10622 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10624 int size;
10626 if (!TARGET_64BIT)
10627 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10628 else
10629 size = (GET_MODE_SIZE (mode) + 4) / 8;
10631 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10632 abort ();
10633 if (size < 2 || size > 3)
10634 abort ();
10636 /* Optimize constant pool reference to immediates. This is used by fp
10637 moves, that force all constants to memory to allow combining. */
10638 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10640 rtx tmp = maybe_get_pool_constant (operand);
10641 if (tmp)
10642 operand = tmp;
10645 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10647 /* The only non-offsetable memories we handle are pushes. */
10648 if (! push_operand (operand, VOIDmode))
10649 abort ();
10651 operand = copy_rtx (operand);
10652 PUT_MODE (operand, Pmode);
10653 parts[0] = parts[1] = parts[2] = operand;
10655 else if (!TARGET_64BIT)
10657 if (mode == DImode)
10658 split_di (&operand, 1, &parts[0], &parts[1]);
10659 else
10661 if (REG_P (operand))
10663 if (!reload_completed)
10664 abort ();
10665 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10666 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10667 if (size == 3)
10668 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10670 else if (offsettable_memref_p (operand))
10672 operand = adjust_address (operand, SImode, 0);
10673 parts[0] = operand;
10674 parts[1] = adjust_address (operand, SImode, 4);
10675 if (size == 3)
10676 parts[2] = adjust_address (operand, SImode, 8);
10678 else if (GET_CODE (operand) == CONST_DOUBLE)
10680 REAL_VALUE_TYPE r;
10681 long l[4];
10683 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10684 switch (mode)
10686 case XFmode:
10687 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10688 parts[2] = gen_int_mode (l[2], SImode);
10689 break;
10690 case DFmode:
10691 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10692 break;
10693 default:
10694 abort ();
10696 parts[1] = gen_int_mode (l[1], SImode);
10697 parts[0] = gen_int_mode (l[0], SImode);
10699 else
10700 abort ();
10703 else
10705 if (mode == TImode)
10706 split_ti (&operand, 1, &parts[0], &parts[1]);
10707 if (mode == XFmode || mode == TFmode)
10709 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10710 if (REG_P (operand))
10712 if (!reload_completed)
10713 abort ();
10714 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10715 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10717 else if (offsettable_memref_p (operand))
10719 operand = adjust_address (operand, DImode, 0);
10720 parts[0] = operand;
10721 parts[1] = adjust_address (operand, upper_mode, 8);
10723 else if (GET_CODE (operand) == CONST_DOUBLE)
10725 REAL_VALUE_TYPE r;
10726 long l[3];
10728 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10729 real_to_target (l, &r, mode);
10730 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10731 if (HOST_BITS_PER_WIDE_INT >= 64)
10732 parts[0]
10733 = gen_int_mode
10734 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10735 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10736 DImode);
10737 else
10738 parts[0] = immed_double_const (l[0], l[1], DImode);
10739 if (upper_mode == SImode)
10740 parts[1] = gen_int_mode (l[2], SImode);
10741 else if (HOST_BITS_PER_WIDE_INT >= 64)
10742 parts[1]
10743 = gen_int_mode
10744 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10745 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10746 DImode);
10747 else
10748 parts[1] = immed_double_const (l[2], l[3], DImode);
10750 else
10751 abort ();
10755 return size;
10758 /* Emit insns to perform a move or push of DI, DF, and XF values.
10759 Return false when normal moves are needed; true when all required
10760 insns have been emitted. Operands 2-4 contain the input values
10761 int the correct order; operands 5-7 contain the output values. */
10763 void
10764 ix86_split_long_move (rtx operands[])
10766 rtx part[2][3];
10767 int nparts;
10768 int push = 0;
10769 int collisions = 0;
10770 enum machine_mode mode = GET_MODE (operands[0]);
10772 /* The DFmode expanders may ask us to move double.
10773 For 64bit target this is single move. By hiding the fact
10774 here we simplify i386.md splitters. */
10775 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10777 /* Optimize constant pool reference to immediates. This is used by
10778 fp moves, that force all constants to memory to allow combining. */
10780 if (GET_CODE (operands[1]) == MEM
10781 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10782 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10783 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10784 if (push_operand (operands[0], VOIDmode))
10786 operands[0] = copy_rtx (operands[0]);
10787 PUT_MODE (operands[0], Pmode);
10789 else
10790 operands[0] = gen_lowpart (DImode, operands[0]);
10791 operands[1] = gen_lowpart (DImode, operands[1]);
10792 emit_move_insn (operands[0], operands[1]);
10793 return;
10796 /* The only non-offsettable memory we handle is push. */
10797 if (push_operand (operands[0], VOIDmode))
10798 push = 1;
10799 else if (GET_CODE (operands[0]) == MEM
10800 && ! offsettable_memref_p (operands[0]))
10801 abort ();
10803 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10804 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10806 /* When emitting push, take care for source operands on the stack. */
10807 if (push && GET_CODE (operands[1]) == MEM
10808 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10810 if (nparts == 3)
10811 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10812 XEXP (part[1][2], 0));
10813 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10814 XEXP (part[1][1], 0));
10817 /* We need to do copy in the right order in case an address register
10818 of the source overlaps the destination. */
10819 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10821 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10822 collisions++;
10823 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10824 collisions++;
10825 if (nparts == 3
10826 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10827 collisions++;
10829 /* Collision in the middle part can be handled by reordering. */
10830 if (collisions == 1 && nparts == 3
10831 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10833 rtx tmp;
10834 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10835 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10838 /* If there are more collisions, we can't handle it by reordering.
10839 Do an lea to the last part and use only one colliding move. */
10840 else if (collisions > 1)
10842 rtx base;
10844 collisions = 1;
10846 base = part[0][nparts - 1];
10848 /* Handle the case when the last part isn't valid for lea.
10849 Happens in 64-bit mode storing the 12-byte XFmode. */
10850 if (GET_MODE (base) != Pmode)
10851 base = gen_rtx_REG (Pmode, REGNO (base));
10853 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10854 part[1][0] = replace_equiv_address (part[1][0], base);
10855 part[1][1] = replace_equiv_address (part[1][1],
10856 plus_constant (base, UNITS_PER_WORD));
10857 if (nparts == 3)
10858 part[1][2] = replace_equiv_address (part[1][2],
10859 plus_constant (base, 8));
10863 if (push)
10865 if (!TARGET_64BIT)
10867 if (nparts == 3)
10869 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10870 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10871 emit_move_insn (part[0][2], part[1][2]);
10874 else
10876 /* In 64bit mode we don't have 32bit push available. In case this is
10877 register, it is OK - we will just use larger counterpart. We also
10878 retype memory - these comes from attempt to avoid REX prefix on
10879 moving of second half of TFmode value. */
10880 if (GET_MODE (part[1][1]) == SImode)
10882 if (GET_CODE (part[1][1]) == MEM)
10883 part[1][1] = adjust_address (part[1][1], DImode, 0);
10884 else if (REG_P (part[1][1]))
10885 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10886 else
10887 abort ();
10888 if (GET_MODE (part[1][0]) == SImode)
10889 part[1][0] = part[1][1];
10892 emit_move_insn (part[0][1], part[1][1]);
10893 emit_move_insn (part[0][0], part[1][0]);
10894 return;
10897 /* Choose correct order to not overwrite the source before it is copied. */
10898 if ((REG_P (part[0][0])
10899 && REG_P (part[1][1])
10900 && (REGNO (part[0][0]) == REGNO (part[1][1])
10901 || (nparts == 3
10902 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10903 || (collisions > 0
10904 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10906 if (nparts == 3)
10908 operands[2] = part[0][2];
10909 operands[3] = part[0][1];
10910 operands[4] = part[0][0];
10911 operands[5] = part[1][2];
10912 operands[6] = part[1][1];
10913 operands[7] = part[1][0];
10915 else
10917 operands[2] = part[0][1];
10918 operands[3] = part[0][0];
10919 operands[5] = part[1][1];
10920 operands[6] = part[1][0];
10923 else
10925 if (nparts == 3)
10927 operands[2] = part[0][0];
10928 operands[3] = part[0][1];
10929 operands[4] = part[0][2];
10930 operands[5] = part[1][0];
10931 operands[6] = part[1][1];
10932 operands[7] = part[1][2];
10934 else
10936 operands[2] = part[0][0];
10937 operands[3] = part[0][1];
10938 operands[5] = part[1][0];
10939 operands[6] = part[1][1];
10942 emit_move_insn (operands[2], operands[5]);
10943 emit_move_insn (operands[3], operands[6]);
10944 if (nparts == 3)
10945 emit_move_insn (operands[4], operands[7]);
10947 return;
10950 void
10951 ix86_split_ashldi (rtx *operands, rtx scratch)
10953 rtx low[2], high[2];
10954 int count;
10956 if (GET_CODE (operands[2]) == CONST_INT)
10958 split_di (operands, 2, low, high);
10959 count = INTVAL (operands[2]) & 63;
10961 if (count >= 32)
10963 emit_move_insn (high[0], low[1]);
10964 emit_move_insn (low[0], const0_rtx);
10966 if (count > 32)
10967 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10969 else
10971 if (!rtx_equal_p (operands[0], operands[1]))
10972 emit_move_insn (operands[0], operands[1]);
10973 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10974 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10977 else
10979 if (!rtx_equal_p (operands[0], operands[1]))
10980 emit_move_insn (operands[0], operands[1]);
10982 split_di (operands, 1, low, high);
10984 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10985 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10987 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10989 if (! no_new_pseudos)
10990 scratch = force_reg (SImode, const0_rtx);
10991 else
10992 emit_move_insn (scratch, const0_rtx);
10994 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10995 scratch));
10997 else
10998 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11002 void
11003 ix86_split_ashrdi (rtx *operands, rtx scratch)
11005 rtx low[2], high[2];
11006 int count;
11008 if (GET_CODE (operands[2]) == CONST_INT)
11010 split_di (operands, 2, low, high);
11011 count = INTVAL (operands[2]) & 63;
11013 if (count == 63)
11015 emit_move_insn (high[0], high[1]);
11016 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11017 emit_move_insn (low[0], high[0]);
11020 else if (count >= 32)
11022 emit_move_insn (low[0], high[1]);
11024 if (! reload_completed)
11025 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
11026 else
11028 emit_move_insn (high[0], low[0]);
11029 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11032 if (count > 32)
11033 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
11035 else
11037 if (!rtx_equal_p (operands[0], operands[1]))
11038 emit_move_insn (operands[0], operands[1]);
11039 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11040 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
11043 else
11045 if (!rtx_equal_p (operands[0], operands[1]))
11046 emit_move_insn (operands[0], operands[1]);
11048 split_di (operands, 1, low, high);
11050 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11051 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
11053 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11055 if (! no_new_pseudos)
11056 scratch = gen_reg_rtx (SImode);
11057 emit_move_insn (scratch, high[0]);
11058 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11059 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11060 scratch));
11062 else
11063 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11067 void
11068 ix86_split_lshrdi (rtx *operands, rtx scratch)
11070 rtx low[2], high[2];
11071 int count;
11073 if (GET_CODE (operands[2]) == CONST_INT)
11075 split_di (operands, 2, low, high);
11076 count = INTVAL (operands[2]) & 63;
11078 if (count >= 32)
11080 emit_move_insn (low[0], high[1]);
11081 emit_move_insn (high[0], const0_rtx);
11083 if (count > 32)
11084 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11086 else
11088 if (!rtx_equal_p (operands[0], operands[1]))
11089 emit_move_insn (operands[0], operands[1]);
11090 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11091 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11094 else
11096 if (!rtx_equal_p (operands[0], operands[1]))
11097 emit_move_insn (operands[0], operands[1]);
11099 split_di (operands, 1, low, high);
11101 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11102 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11104 /* Heh. By reversing the arguments, we can reuse this pattern. */
11105 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11107 if (! no_new_pseudos)
11108 scratch = force_reg (SImode, const0_rtx);
11109 else
11110 emit_move_insn (scratch, const0_rtx);
11112 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11113 scratch));
11115 else
11116 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11120 /* Helper function for the string operations below. Dest VARIABLE whether
11121 it is aligned to VALUE bytes. If true, jump to the label. */
11122 static rtx
11123 ix86_expand_aligntest (rtx variable, int value)
11125 rtx label = gen_label_rtx ();
11126 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11127 if (GET_MODE (variable) == DImode)
11128 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11129 else
11130 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11131 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11132 1, label);
11133 return label;
11136 /* Adjust COUNTER by the VALUE. */
11137 static void
11138 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11140 if (GET_MODE (countreg) == DImode)
11141 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11142 else
11143 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11146 /* Zero extend possibly SImode EXP to Pmode register. */
11148 ix86_zero_extend_to_Pmode (rtx exp)
11150 rtx r;
11151 if (GET_MODE (exp) == VOIDmode)
11152 return force_reg (Pmode, exp);
11153 if (GET_MODE (exp) == Pmode)
11154 return copy_to_mode_reg (Pmode, exp);
11155 r = gen_reg_rtx (Pmode);
11156 emit_insn (gen_zero_extendsidi2 (r, exp));
11157 return r;
11160 /* Expand string move (memcpy) operation. Use i386 string operations when
11161 profitable. expand_clrmem contains similar code. */
11163 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11165 rtx srcreg, destreg, countreg, srcexp, destexp;
11166 enum machine_mode counter_mode;
11167 HOST_WIDE_INT align = 0;
11168 unsigned HOST_WIDE_INT count = 0;
11170 if (GET_CODE (align_exp) == CONST_INT)
11171 align = INTVAL (align_exp);
11173 /* Can't use any of this if the user has appropriated esi or edi. */
11174 if (global_regs[4] || global_regs[5])
11175 return 0;
11177 /* This simple hack avoids all inlining code and simplifies code below. */
11178 if (!TARGET_ALIGN_STRINGOPS)
11179 align = 64;
11181 if (GET_CODE (count_exp) == CONST_INT)
11183 count = INTVAL (count_exp);
11184 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11185 return 0;
11188 /* Figure out proper mode for counter. For 32bits it is always SImode,
11189 for 64bits use SImode when possible, otherwise DImode.
11190 Set count to number of bytes copied when known at compile time. */
11191 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11192 || x86_64_zero_extended_value (count_exp))
11193 counter_mode = SImode;
11194 else
11195 counter_mode = DImode;
11197 if (counter_mode != SImode && counter_mode != DImode)
11198 abort ();
11200 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11201 if (destreg != XEXP (dst, 0))
11202 dst = replace_equiv_address_nv (dst, destreg);
11203 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11204 if (srcreg != XEXP (src, 0))
11205 src = replace_equiv_address_nv (src, srcreg);
11207 /* When optimizing for size emit simple rep ; movsb instruction for
11208 counts not divisible by 4. */
11210 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11212 emit_insn (gen_cld ());
11213 countreg = ix86_zero_extend_to_Pmode (count_exp);
11214 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11215 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11216 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11217 destexp, srcexp));
11220 /* For constant aligned (or small unaligned) copies use rep movsl
11221 followed by code copying the rest. For PentiumPro ensure 8 byte
11222 alignment to allow rep movsl acceleration. */
11224 else if (count != 0
11225 && (align >= 8
11226 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11227 || optimize_size || count < (unsigned int) 64))
11229 unsigned HOST_WIDE_INT offset = 0;
11230 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11231 rtx srcmem, dstmem;
11233 emit_insn (gen_cld ());
11234 if (count & ~(size - 1))
11236 countreg = copy_to_mode_reg (counter_mode,
11237 GEN_INT ((count >> (size == 4 ? 2 : 3))
11238 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11239 countreg = ix86_zero_extend_to_Pmode (countreg);
11241 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11242 GEN_INT (size == 4 ? 2 : 3));
11243 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11244 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11246 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11247 countreg, destexp, srcexp));
11248 offset = count & ~(size - 1);
11250 if (size == 8 && (count & 0x04))
11252 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11253 offset);
11254 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11255 offset);
11256 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11257 offset += 4;
11259 if (count & 0x02)
11261 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11262 offset);
11263 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11264 offset);
11265 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11266 offset += 2;
11268 if (count & 0x01)
11270 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11271 offset);
11272 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11273 offset);
11274 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11277 /* The generic code based on the glibc implementation:
11278 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11279 allowing accelerated copying there)
11280 - copy the data using rep movsl
11281 - copy the rest. */
11282 else
11284 rtx countreg2;
11285 rtx label = NULL;
11286 rtx srcmem, dstmem;
11287 int desired_alignment = (TARGET_PENTIUMPRO
11288 && (count == 0 || count >= (unsigned int) 260)
11289 ? 8 : UNITS_PER_WORD);
11290 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11291 dst = change_address (dst, BLKmode, destreg);
11292 src = change_address (src, BLKmode, srcreg);
11294 /* In case we don't know anything about the alignment, default to
11295 library version, since it is usually equally fast and result in
11296 shorter code.
11298 Also emit call when we know that the count is large and call overhead
11299 will not be important. */
11300 if (!TARGET_INLINE_ALL_STRINGOPS
11301 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11302 return 0;
11304 if (TARGET_SINGLE_STRINGOP)
11305 emit_insn (gen_cld ());
11307 countreg2 = gen_reg_rtx (Pmode);
11308 countreg = copy_to_mode_reg (counter_mode, count_exp);
11310 /* We don't use loops to align destination and to copy parts smaller
11311 than 4 bytes, because gcc is able to optimize such code better (in
11312 the case the destination or the count really is aligned, gcc is often
11313 able to predict the branches) and also it is friendlier to the
11314 hardware branch prediction.
11316 Using loops is beneficial for generic case, because we can
11317 handle small counts using the loops. Many CPUs (such as Athlon)
11318 have large REP prefix setup costs.
11320 This is quite costly. Maybe we can revisit this decision later or
11321 add some customizability to this code. */
11323 if (count == 0 && align < desired_alignment)
11325 label = gen_label_rtx ();
11326 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11327 LEU, 0, counter_mode, 1, label);
11329 if (align <= 1)
11331 rtx label = ix86_expand_aligntest (destreg, 1);
11332 srcmem = change_address (src, QImode, srcreg);
11333 dstmem = change_address (dst, QImode, destreg);
11334 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11335 ix86_adjust_counter (countreg, 1);
11336 emit_label (label);
11337 LABEL_NUSES (label) = 1;
11339 if (align <= 2)
11341 rtx label = ix86_expand_aligntest (destreg, 2);
11342 srcmem = change_address (src, HImode, srcreg);
11343 dstmem = change_address (dst, HImode, destreg);
11344 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11345 ix86_adjust_counter (countreg, 2);
11346 emit_label (label);
11347 LABEL_NUSES (label) = 1;
11349 if (align <= 4 && desired_alignment > 4)
11351 rtx label = ix86_expand_aligntest (destreg, 4);
11352 srcmem = change_address (src, SImode, srcreg);
11353 dstmem = change_address (dst, SImode, destreg);
11354 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11355 ix86_adjust_counter (countreg, 4);
11356 emit_label (label);
11357 LABEL_NUSES (label) = 1;
11360 if (label && desired_alignment > 4 && !TARGET_64BIT)
11362 emit_label (label);
11363 LABEL_NUSES (label) = 1;
11364 label = NULL_RTX;
11366 if (!TARGET_SINGLE_STRINGOP)
11367 emit_insn (gen_cld ());
11368 if (TARGET_64BIT)
11370 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11371 GEN_INT (3)));
11372 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11374 else
11376 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11377 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11379 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11380 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11381 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11382 countreg2, destexp, srcexp));
11384 if (label)
11386 emit_label (label);
11387 LABEL_NUSES (label) = 1;
11389 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11391 srcmem = change_address (src, SImode, srcreg);
11392 dstmem = change_address (dst, SImode, destreg);
11393 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11395 if ((align <= 4 || count == 0) && TARGET_64BIT)
11397 rtx label = ix86_expand_aligntest (countreg, 4);
11398 srcmem = change_address (src, SImode, srcreg);
11399 dstmem = change_address (dst, SImode, destreg);
11400 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11401 emit_label (label);
11402 LABEL_NUSES (label) = 1;
11404 if (align > 2 && count != 0 && (count & 2))
11406 srcmem = change_address (src, HImode, srcreg);
11407 dstmem = change_address (dst, HImode, destreg);
11408 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11410 if (align <= 2 || count == 0)
11412 rtx label = ix86_expand_aligntest (countreg, 2);
11413 srcmem = change_address (src, HImode, srcreg);
11414 dstmem = change_address (dst, HImode, destreg);
11415 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11416 emit_label (label);
11417 LABEL_NUSES (label) = 1;
11419 if (align > 1 && count != 0 && (count & 1))
11421 srcmem = change_address (src, QImode, srcreg);
11422 dstmem = change_address (dst, QImode, destreg);
11423 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11425 if (align <= 1 || count == 0)
11427 rtx label = ix86_expand_aligntest (countreg, 1);
11428 srcmem = change_address (src, QImode, srcreg);
11429 dstmem = change_address (dst, QImode, destreg);
11430 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11431 emit_label (label);
11432 LABEL_NUSES (label) = 1;
11436 return 1;
11439 /* Expand string clear operation (bzero). Use i386 string operations when
11440 profitable. expand_movmem contains similar code. */
11442 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11444 rtx destreg, zeroreg, countreg, destexp;
11445 enum machine_mode counter_mode;
11446 HOST_WIDE_INT align = 0;
11447 unsigned HOST_WIDE_INT count = 0;
11449 if (GET_CODE (align_exp) == CONST_INT)
11450 align = INTVAL (align_exp);
11452 /* Can't use any of this if the user has appropriated esi. */
11453 if (global_regs[4])
11454 return 0;
11456 /* This simple hack avoids all inlining code and simplifies code below. */
11457 if (!TARGET_ALIGN_STRINGOPS)
11458 align = 32;
11460 if (GET_CODE (count_exp) == CONST_INT)
11462 count = INTVAL (count_exp);
11463 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11464 return 0;
11466 /* Figure out proper mode for counter. For 32bits it is always SImode,
11467 for 64bits use SImode when possible, otherwise DImode.
11468 Set count to number of bytes copied when known at compile time. */
11469 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11470 || x86_64_zero_extended_value (count_exp))
11471 counter_mode = SImode;
11472 else
11473 counter_mode = DImode;
11475 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11476 if (destreg != XEXP (dst, 0))
11477 dst = replace_equiv_address_nv (dst, destreg);
11479 emit_insn (gen_cld ());
11481 /* When optimizing for size emit simple rep ; movsb instruction for
11482 counts not divisible by 4. */
11484 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11486 countreg = ix86_zero_extend_to_Pmode (count_exp);
11487 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11488 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11489 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11491 else if (count != 0
11492 && (align >= 8
11493 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11494 || optimize_size || count < (unsigned int) 64))
11496 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11497 unsigned HOST_WIDE_INT offset = 0;
11499 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11500 if (count & ~(size - 1))
11502 countreg = copy_to_mode_reg (counter_mode,
11503 GEN_INT ((count >> (size == 4 ? 2 : 3))
11504 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11505 countreg = ix86_zero_extend_to_Pmode (countreg);
11506 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11507 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11508 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11509 offset = count & ~(size - 1);
11511 if (size == 8 && (count & 0x04))
11513 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11514 offset);
11515 emit_insn (gen_strset (destreg, mem,
11516 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11517 offset += 4;
11519 if (count & 0x02)
11521 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11522 offset);
11523 emit_insn (gen_strset (destreg, mem,
11524 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11525 offset += 2;
11527 if (count & 0x01)
11529 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11530 offset);
11531 emit_insn (gen_strset (destreg, mem,
11532 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11535 else
11537 rtx countreg2;
11538 rtx label = NULL;
11539 /* Compute desired alignment of the string operation. */
11540 int desired_alignment = (TARGET_PENTIUMPRO
11541 && (count == 0 || count >= (unsigned int) 260)
11542 ? 8 : UNITS_PER_WORD);
11544 /* In case we don't know anything about the alignment, default to
11545 library version, since it is usually equally fast and result in
11546 shorter code.
11548 Also emit call when we know that the count is large and call overhead
11549 will not be important. */
11550 if (!TARGET_INLINE_ALL_STRINGOPS
11551 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11552 return 0;
11554 if (TARGET_SINGLE_STRINGOP)
11555 emit_insn (gen_cld ());
11557 countreg2 = gen_reg_rtx (Pmode);
11558 countreg = copy_to_mode_reg (counter_mode, count_exp);
11559 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11560 /* Get rid of MEM_OFFSET, it won't be accurate. */
11561 dst = change_address (dst, BLKmode, destreg);
11563 if (count == 0 && align < desired_alignment)
11565 label = gen_label_rtx ();
11566 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11567 LEU, 0, counter_mode, 1, label);
11569 if (align <= 1)
11571 rtx label = ix86_expand_aligntest (destreg, 1);
11572 emit_insn (gen_strset (destreg, dst,
11573 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11574 ix86_adjust_counter (countreg, 1);
11575 emit_label (label);
11576 LABEL_NUSES (label) = 1;
11578 if (align <= 2)
11580 rtx label = ix86_expand_aligntest (destreg, 2);
11581 emit_insn (gen_strset (destreg, dst,
11582 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11583 ix86_adjust_counter (countreg, 2);
11584 emit_label (label);
11585 LABEL_NUSES (label) = 1;
11587 if (align <= 4 && desired_alignment > 4)
11589 rtx label = ix86_expand_aligntest (destreg, 4);
11590 emit_insn (gen_strset (destreg, dst,
11591 (TARGET_64BIT
11592 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11593 : zeroreg)));
11594 ix86_adjust_counter (countreg, 4);
11595 emit_label (label);
11596 LABEL_NUSES (label) = 1;
11599 if (label && desired_alignment > 4 && !TARGET_64BIT)
11601 emit_label (label);
11602 LABEL_NUSES (label) = 1;
11603 label = NULL_RTX;
11606 if (!TARGET_SINGLE_STRINGOP)
11607 emit_insn (gen_cld ());
11608 if (TARGET_64BIT)
11610 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11611 GEN_INT (3)));
11612 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11614 else
11616 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11617 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11619 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11620 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11622 if (label)
11624 emit_label (label);
11625 LABEL_NUSES (label) = 1;
11628 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11629 emit_insn (gen_strset (destreg, dst,
11630 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11631 if (TARGET_64BIT && (align <= 4 || count == 0))
11633 rtx label = ix86_expand_aligntest (countreg, 4);
11634 emit_insn (gen_strset (destreg, dst,
11635 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11636 emit_label (label);
11637 LABEL_NUSES (label) = 1;
11639 if (align > 2 && count != 0 && (count & 2))
11640 emit_insn (gen_strset (destreg, dst,
11641 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11642 if (align <= 2 || count == 0)
11644 rtx label = ix86_expand_aligntest (countreg, 2);
11645 emit_insn (gen_strset (destreg, dst,
11646 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11647 emit_label (label);
11648 LABEL_NUSES (label) = 1;
11650 if (align > 1 && count != 0 && (count & 1))
11651 emit_insn (gen_strset (destreg, dst,
11652 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11653 if (align <= 1 || count == 0)
11655 rtx label = ix86_expand_aligntest (countreg, 1);
11656 emit_insn (gen_strset (destreg, dst,
11657 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11658 emit_label (label);
11659 LABEL_NUSES (label) = 1;
11662 return 1;
11665 /* Expand strlen. */
11667 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11669 rtx addr, scratch1, scratch2, scratch3, scratch4;
11671 /* The generic case of strlen expander is long. Avoid it's
11672 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11674 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11675 && !TARGET_INLINE_ALL_STRINGOPS
11676 && !optimize_size
11677 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11678 return 0;
11680 addr = force_reg (Pmode, XEXP (src, 0));
11681 scratch1 = gen_reg_rtx (Pmode);
11683 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11684 && !optimize_size)
11686 /* Well it seems that some optimizer does not combine a call like
11687 foo(strlen(bar), strlen(bar));
11688 when the move and the subtraction is done here. It does calculate
11689 the length just once when these instructions are done inside of
11690 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11691 often used and I use one fewer register for the lifetime of
11692 output_strlen_unroll() this is better. */
11694 emit_move_insn (out, addr);
11696 ix86_expand_strlensi_unroll_1 (out, src, align);
11698 /* strlensi_unroll_1 returns the address of the zero at the end of
11699 the string, like memchr(), so compute the length by subtracting
11700 the start address. */
11701 if (TARGET_64BIT)
11702 emit_insn (gen_subdi3 (out, out, addr));
11703 else
11704 emit_insn (gen_subsi3 (out, out, addr));
11706 else
11708 rtx unspec;
11709 scratch2 = gen_reg_rtx (Pmode);
11710 scratch3 = gen_reg_rtx (Pmode);
11711 scratch4 = force_reg (Pmode, constm1_rtx);
11713 emit_move_insn (scratch3, addr);
11714 eoschar = force_reg (QImode, eoschar);
11716 emit_insn (gen_cld ());
11717 src = replace_equiv_address_nv (src, scratch3);
11719 /* If .md starts supporting :P, this can be done in .md. */
11720 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11721 scratch4), UNSPEC_SCAS);
11722 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11723 if (TARGET_64BIT)
11725 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11726 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11728 else
11730 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11731 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11734 return 1;
11737 /* Expand the appropriate insns for doing strlen if not just doing
11738 repnz; scasb
11740 out = result, initialized with the start address
11741 align_rtx = alignment of the address.
11742 scratch = scratch register, initialized with the startaddress when
11743 not aligned, otherwise undefined
11745 This is just the body. It needs the initializations mentioned above and
11746 some address computing at the end. These things are done in i386.md. */
11748 static void
11749 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11751 int align;
11752 rtx tmp;
11753 rtx align_2_label = NULL_RTX;
11754 rtx align_3_label = NULL_RTX;
11755 rtx align_4_label = gen_label_rtx ();
11756 rtx end_0_label = gen_label_rtx ();
11757 rtx mem;
11758 rtx tmpreg = gen_reg_rtx (SImode);
11759 rtx scratch = gen_reg_rtx (SImode);
11760 rtx cmp;
11762 align = 0;
11763 if (GET_CODE (align_rtx) == CONST_INT)
11764 align = INTVAL (align_rtx);
11766 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11768 /* Is there a known alignment and is it less than 4? */
11769 if (align < 4)
11771 rtx scratch1 = gen_reg_rtx (Pmode);
11772 emit_move_insn (scratch1, out);
11773 /* Is there a known alignment and is it not 2? */
11774 if (align != 2)
11776 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11777 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11779 /* Leave just the 3 lower bits. */
11780 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11781 NULL_RTX, 0, OPTAB_WIDEN);
11783 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11784 Pmode, 1, align_4_label);
11785 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11786 Pmode, 1, align_2_label);
11787 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11788 Pmode, 1, align_3_label);
11790 else
11792 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11793 check if is aligned to 4 - byte. */
11795 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11796 NULL_RTX, 0, OPTAB_WIDEN);
11798 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11799 Pmode, 1, align_4_label);
11802 mem = change_address (src, QImode, out);
11804 /* Now compare the bytes. */
11806 /* Compare the first n unaligned byte on a byte per byte basis. */
11807 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11808 QImode, 1, end_0_label);
11810 /* Increment the address. */
11811 if (TARGET_64BIT)
11812 emit_insn (gen_adddi3 (out, out, const1_rtx));
11813 else
11814 emit_insn (gen_addsi3 (out, out, const1_rtx));
11816 /* Not needed with an alignment of 2 */
11817 if (align != 2)
11819 emit_label (align_2_label);
11821 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11822 end_0_label);
11824 if (TARGET_64BIT)
11825 emit_insn (gen_adddi3 (out, out, const1_rtx));
11826 else
11827 emit_insn (gen_addsi3 (out, out, const1_rtx));
11829 emit_label (align_3_label);
11832 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11833 end_0_label);
11835 if (TARGET_64BIT)
11836 emit_insn (gen_adddi3 (out, out, const1_rtx));
11837 else
11838 emit_insn (gen_addsi3 (out, out, const1_rtx));
11841 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11842 align this loop. It gives only huge programs, but does not help to
11843 speed up. */
11844 emit_label (align_4_label);
11846 mem = change_address (src, SImode, out);
11847 emit_move_insn (scratch, mem);
11848 if (TARGET_64BIT)
11849 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11850 else
11851 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11853 /* This formula yields a nonzero result iff one of the bytes is zero.
11854 This saves three branches inside loop and many cycles. */
11856 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11857 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11858 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11859 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11860 gen_int_mode (0x80808080, SImode)));
11861 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11862 align_4_label);
11864 if (TARGET_CMOVE)
11866 rtx reg = gen_reg_rtx (SImode);
11867 rtx reg2 = gen_reg_rtx (Pmode);
11868 emit_move_insn (reg, tmpreg);
11869 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11871 /* If zero is not in the first two bytes, move two bytes forward. */
11872 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11873 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11874 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11875 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11876 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11877 reg,
11878 tmpreg)));
11879 /* Emit lea manually to avoid clobbering of flags. */
11880 emit_insn (gen_rtx_SET (SImode, reg2,
11881 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11883 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11884 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11885 emit_insn (gen_rtx_SET (VOIDmode, out,
11886 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11887 reg2,
11888 out)));
11891 else
11893 rtx end_2_label = gen_label_rtx ();
11894 /* Is zero in the first two bytes? */
11896 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11897 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11898 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11899 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11900 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11901 pc_rtx);
11902 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11903 JUMP_LABEL (tmp) = end_2_label;
11905 /* Not in the first two. Move two bytes forward. */
11906 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11907 if (TARGET_64BIT)
11908 emit_insn (gen_adddi3 (out, out, const2_rtx));
11909 else
11910 emit_insn (gen_addsi3 (out, out, const2_rtx));
11912 emit_label (end_2_label);
11916 /* Avoid branch in fixing the byte. */
11917 tmpreg = gen_lowpart (QImode, tmpreg);
11918 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11919 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11920 if (TARGET_64BIT)
11921 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11922 else
11923 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11925 emit_label (end_0_label);
11928 void
11929 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11930 rtx callarg2 ATTRIBUTE_UNUSED,
11931 rtx pop, int sibcall)
11933 rtx use = NULL, call;
11935 if (pop == const0_rtx)
11936 pop = NULL;
11937 if (TARGET_64BIT && pop)
11938 abort ();
11940 #if TARGET_MACHO
11941 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11942 fnaddr = machopic_indirect_call_target (fnaddr);
11943 #else
11944 /* Static functions and indirect calls don't need the pic register. */
11945 if (! TARGET_64BIT && flag_pic
11946 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11947 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11948 use_reg (&use, pic_offset_table_rtx);
11950 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11952 rtx al = gen_rtx_REG (QImode, 0);
11953 emit_move_insn (al, callarg2);
11954 use_reg (&use, al);
11956 #endif /* TARGET_MACHO */
11958 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11960 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11961 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11963 if (sibcall && TARGET_64BIT
11964 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11966 rtx addr;
11967 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11968 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11969 emit_move_insn (fnaddr, addr);
11970 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11973 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11974 if (retval)
11975 call = gen_rtx_SET (VOIDmode, retval, call);
11976 if (pop)
11978 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11979 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11980 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11983 call = emit_call_insn (call);
11984 if (use)
11985 CALL_INSN_FUNCTION_USAGE (call) = use;
11989 /* Clear stack slot assignments remembered from previous functions.
11990 This is called from INIT_EXPANDERS once before RTL is emitted for each
11991 function. */
11993 static struct machine_function *
11994 ix86_init_machine_status (void)
11996 struct machine_function *f;
11998 f = ggc_alloc_cleared (sizeof (struct machine_function));
11999 f->use_fast_prologue_epilogue_nregs = -1;
12001 return f;
12004 /* Return a MEM corresponding to a stack slot with mode MODE.
12005 Allocate a new slot if necessary.
12007 The RTL for a function can have several slots available: N is
12008 which slot to use. */
12011 assign_386_stack_local (enum machine_mode mode, int n)
12013 struct stack_local_entry *s;
12015 if (n < 0 || n >= MAX_386_STACK_LOCALS)
12016 abort ();
12018 for (s = ix86_stack_locals; s; s = s->next)
12019 if (s->mode == mode && s->n == n)
12020 return s->rtl;
12022 s = (struct stack_local_entry *)
12023 ggc_alloc (sizeof (struct stack_local_entry));
12024 s->n = n;
12025 s->mode = mode;
12026 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12028 s->next = ix86_stack_locals;
12029 ix86_stack_locals = s;
12030 return s->rtl;
12033 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12035 static GTY(()) rtx ix86_tls_symbol;
12037 ix86_tls_get_addr (void)
12040 if (!ix86_tls_symbol)
12042 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12043 (TARGET_GNU_TLS && !TARGET_64BIT)
12044 ? "___tls_get_addr"
12045 : "__tls_get_addr");
12048 return ix86_tls_symbol;
12051 /* Calculate the length of the memory address in the instruction
12052 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12054 static int
12055 memory_address_length (rtx addr)
12057 struct ix86_address parts;
12058 rtx base, index, disp;
12059 int len;
12061 if (GET_CODE (addr) == PRE_DEC
12062 || GET_CODE (addr) == POST_INC
12063 || GET_CODE (addr) == PRE_MODIFY
12064 || GET_CODE (addr) == POST_MODIFY)
12065 return 0;
12067 if (! ix86_decompose_address (addr, &parts))
12068 abort ();
12070 base = parts.base;
12071 index = parts.index;
12072 disp = parts.disp;
12073 len = 0;
12075 /* Rule of thumb:
12076 - esp as the base always wants an index,
12077 - ebp as the base always wants a displacement. */
12079 /* Register Indirect. */
12080 if (base && !index && !disp)
12082 /* esp (for its index) and ebp (for its displacement) need
12083 the two-byte modrm form. */
12084 if (addr == stack_pointer_rtx
12085 || addr == arg_pointer_rtx
12086 || addr == frame_pointer_rtx
12087 || addr == hard_frame_pointer_rtx)
12088 len = 1;
12091 /* Direct Addressing. */
12092 else if (disp && !base && !index)
12093 len = 4;
12095 else
12097 /* Find the length of the displacement constant. */
12098 if (disp)
12100 if (GET_CODE (disp) == CONST_INT
12101 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12102 && base)
12103 len = 1;
12104 else
12105 len = 4;
12107 /* ebp always wants a displacement. */
12108 else if (base == hard_frame_pointer_rtx)
12109 len = 1;
12111 /* An index requires the two-byte modrm form.... */
12112 if (index
12113 /* ...like esp, which always wants an index. */
12114 || base == stack_pointer_rtx
12115 || base == arg_pointer_rtx
12116 || base == frame_pointer_rtx)
12117 len += 1;
12120 return len;
12123 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12124 is set, expect that insn have 8bit immediate alternative. */
12126 ix86_attr_length_immediate_default (rtx insn, int shortform)
12128 int len = 0;
12129 int i;
12130 extract_insn_cached (insn);
12131 for (i = recog_data.n_operands - 1; i >= 0; --i)
12132 if (CONSTANT_P (recog_data.operand[i]))
12134 if (len)
12135 abort ();
12136 if (shortform
12137 && GET_CODE (recog_data.operand[i]) == CONST_INT
12138 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12139 len = 1;
12140 else
12142 switch (get_attr_mode (insn))
12144 case MODE_QI:
12145 len+=1;
12146 break;
12147 case MODE_HI:
12148 len+=2;
12149 break;
12150 case MODE_SI:
12151 len+=4;
12152 break;
12153 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12154 case MODE_DI:
12155 len+=4;
12156 break;
12157 default:
12158 fatal_insn ("unknown insn mode", insn);
12162 return len;
12164 /* Compute default value for "length_address" attribute. */
12166 ix86_attr_length_address_default (rtx insn)
12168 int i;
12170 if (get_attr_type (insn) == TYPE_LEA)
12172 rtx set = PATTERN (insn);
12173 if (GET_CODE (set) == SET)
12175 else if (GET_CODE (set) == PARALLEL
12176 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12177 set = XVECEXP (set, 0, 0);
12178 else
12180 #ifdef ENABLE_CHECKING
12181 abort ();
12182 #endif
12183 return 0;
12186 return memory_address_length (SET_SRC (set));
12189 extract_insn_cached (insn);
12190 for (i = recog_data.n_operands - 1; i >= 0; --i)
12191 if (GET_CODE (recog_data.operand[i]) == MEM)
12193 return memory_address_length (XEXP (recog_data.operand[i], 0));
12194 break;
12196 return 0;
12199 /* Return the maximum number of instructions a cpu can issue. */
12201 static int
12202 ix86_issue_rate (void)
12204 switch (ix86_tune)
12206 case PROCESSOR_PENTIUM:
12207 case PROCESSOR_K6:
12208 return 2;
12210 case PROCESSOR_PENTIUMPRO:
12211 case PROCESSOR_PENTIUM4:
12212 case PROCESSOR_ATHLON:
12213 case PROCESSOR_K8:
12214 case PROCESSOR_NOCONA:
12215 return 3;
12217 default:
12218 return 1;
12222 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12223 by DEP_INSN and nothing set by DEP_INSN. */
12225 static int
12226 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12228 rtx set, set2;
12230 /* Simplify the test for uninteresting insns. */
12231 if (insn_type != TYPE_SETCC
12232 && insn_type != TYPE_ICMOV
12233 && insn_type != TYPE_FCMOV
12234 && insn_type != TYPE_IBR)
12235 return 0;
12237 if ((set = single_set (dep_insn)) != 0)
12239 set = SET_DEST (set);
12240 set2 = NULL_RTX;
12242 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12243 && XVECLEN (PATTERN (dep_insn), 0) == 2
12244 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12245 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12247 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12248 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12250 else
12251 return 0;
12253 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12254 return 0;
12256 /* This test is true if the dependent insn reads the flags but
12257 not any other potentially set register. */
12258 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12259 return 0;
12261 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12262 return 0;
12264 return 1;
12267 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12268 address with operands set by DEP_INSN. */
12270 static int
12271 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12273 rtx addr;
12275 if (insn_type == TYPE_LEA
12276 && TARGET_PENTIUM)
12278 addr = PATTERN (insn);
12279 if (GET_CODE (addr) == SET)
12281 else if (GET_CODE (addr) == PARALLEL
12282 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12283 addr = XVECEXP (addr, 0, 0);
12284 else
12285 abort ();
12286 addr = SET_SRC (addr);
12288 else
12290 int i;
12291 extract_insn_cached (insn);
12292 for (i = recog_data.n_operands - 1; i >= 0; --i)
12293 if (GET_CODE (recog_data.operand[i]) == MEM)
12295 addr = XEXP (recog_data.operand[i], 0);
12296 goto found;
12298 return 0;
12299 found:;
12302 return modified_in_p (addr, dep_insn);
12305 static int
12306 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12308 enum attr_type insn_type, dep_insn_type;
12309 enum attr_memory memory;
12310 rtx set, set2;
12311 int dep_insn_code_number;
12313 /* Anti and output dependencies have zero cost on all CPUs. */
12314 if (REG_NOTE_KIND (link) != 0)
12315 return 0;
12317 dep_insn_code_number = recog_memoized (dep_insn);
12319 /* If we can't recognize the insns, we can't really do anything. */
12320 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12321 return cost;
12323 insn_type = get_attr_type (insn);
12324 dep_insn_type = get_attr_type (dep_insn);
12326 switch (ix86_tune)
12328 case PROCESSOR_PENTIUM:
12329 /* Address Generation Interlock adds a cycle of latency. */
12330 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12331 cost += 1;
12333 /* ??? Compares pair with jump/setcc. */
12334 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12335 cost = 0;
12337 /* Floating point stores require value to be ready one cycle earlier. */
12338 if (insn_type == TYPE_FMOV
12339 && get_attr_memory (insn) == MEMORY_STORE
12340 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12341 cost += 1;
12342 break;
12344 case PROCESSOR_PENTIUMPRO:
12345 memory = get_attr_memory (insn);
12347 /* INT->FP conversion is expensive. */
12348 if (get_attr_fp_int_src (dep_insn))
12349 cost += 5;
12351 /* There is one cycle extra latency between an FP op and a store. */
12352 if (insn_type == TYPE_FMOV
12353 && (set = single_set (dep_insn)) != NULL_RTX
12354 && (set2 = single_set (insn)) != NULL_RTX
12355 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12356 && GET_CODE (SET_DEST (set2)) == MEM)
12357 cost += 1;
12359 /* Show ability of reorder buffer to hide latency of load by executing
12360 in parallel with previous instruction in case
12361 previous instruction is not needed to compute the address. */
12362 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12363 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12365 /* Claim moves to take one cycle, as core can issue one load
12366 at time and the next load can start cycle later. */
12367 if (dep_insn_type == TYPE_IMOV
12368 || dep_insn_type == TYPE_FMOV)
12369 cost = 1;
12370 else if (cost > 1)
12371 cost--;
12373 break;
12375 case PROCESSOR_K6:
12376 memory = get_attr_memory (insn);
12378 /* The esp dependency is resolved before the instruction is really
12379 finished. */
12380 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12381 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12382 return 1;
12384 /* INT->FP conversion is expensive. */
12385 if (get_attr_fp_int_src (dep_insn))
12386 cost += 5;
12388 /* Show ability of reorder buffer to hide latency of load by executing
12389 in parallel with previous instruction in case
12390 previous instruction is not needed to compute the address. */
12391 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12392 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12394 /* Claim moves to take one cycle, as core can issue one load
12395 at time and the next load can start cycle later. */
12396 if (dep_insn_type == TYPE_IMOV
12397 || dep_insn_type == TYPE_FMOV)
12398 cost = 1;
12399 else if (cost > 2)
12400 cost -= 2;
12401 else
12402 cost = 1;
12404 break;
12406 case PROCESSOR_ATHLON:
12407 case PROCESSOR_K8:
12408 memory = get_attr_memory (insn);
12410 /* Show ability of reorder buffer to hide latency of load by executing
12411 in parallel with previous instruction in case
12412 previous instruction is not needed to compute the address. */
12413 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12414 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12416 enum attr_unit unit = get_attr_unit (insn);
12417 int loadcost = 3;
12419 /* Because of the difference between the length of integer and
12420 floating unit pipeline preparation stages, the memory operands
12421 for floating point are cheaper.
12423 ??? For Athlon it the difference is most probably 2. */
12424 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12425 loadcost = 3;
12426 else
12427 loadcost = TARGET_ATHLON ? 2 : 0;
12429 if (cost >= loadcost)
12430 cost -= loadcost;
12431 else
12432 cost = 0;
12435 default:
12436 break;
12439 return cost;
12442 /* How many alternative schedules to try. This should be as wide as the
12443 scheduling freedom in the DFA, but no wider. Making this value too
12444 large results extra work for the scheduler. */
12446 static int
12447 ia32_multipass_dfa_lookahead (void)
12449 if (ix86_tune == PROCESSOR_PENTIUM)
12450 return 2;
12452 if (ix86_tune == PROCESSOR_PENTIUMPRO
12453 || ix86_tune == PROCESSOR_K6)
12454 return 1;
12456 else
12457 return 0;
12461 /* Compute the alignment given to a constant that is being placed in memory.
12462 EXP is the constant and ALIGN is the alignment that the object would
12463 ordinarily have.
12464 The value of this function is used instead of that alignment to align
12465 the object. */
12468 ix86_constant_alignment (tree exp, int align)
12470 if (TREE_CODE (exp) == REAL_CST)
12472 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12473 return 64;
12474 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12475 return 128;
12477 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12478 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12479 return BITS_PER_WORD;
12481 return align;
12484 /* Compute the alignment for a static variable.
12485 TYPE is the data type, and ALIGN is the alignment that
12486 the object would ordinarily have. The value of this function is used
12487 instead of that alignment to align the object. */
12490 ix86_data_alignment (tree type, int align)
12492 if (AGGREGATE_TYPE_P (type)
12493 && TYPE_SIZE (type)
12494 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12495 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12496 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12497 return 256;
12499 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12500 to 16byte boundary. */
12501 if (TARGET_64BIT)
12503 if (AGGREGATE_TYPE_P (type)
12504 && TYPE_SIZE (type)
12505 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12506 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12507 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12508 return 128;
12511 if (TREE_CODE (type) == ARRAY_TYPE)
12513 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12514 return 64;
12515 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12516 return 128;
12518 else if (TREE_CODE (type) == COMPLEX_TYPE)
12521 if (TYPE_MODE (type) == DCmode && align < 64)
12522 return 64;
12523 if (TYPE_MODE (type) == XCmode && align < 128)
12524 return 128;
12526 else if ((TREE_CODE (type) == RECORD_TYPE
12527 || TREE_CODE (type) == UNION_TYPE
12528 || TREE_CODE (type) == QUAL_UNION_TYPE)
12529 && TYPE_FIELDS (type))
12531 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12532 return 64;
12533 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12534 return 128;
12536 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12537 || TREE_CODE (type) == INTEGER_TYPE)
12539 if (TYPE_MODE (type) == DFmode && align < 64)
12540 return 64;
12541 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12542 return 128;
12545 return align;
12548 /* Compute the alignment for a local variable.
12549 TYPE is the data type, and ALIGN is the alignment that
12550 the object would ordinarily have. The value of this macro is used
12551 instead of that alignment to align the object. */
12554 ix86_local_alignment (tree type, int align)
12556 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12557 to 16byte boundary. */
12558 if (TARGET_64BIT)
12560 if (AGGREGATE_TYPE_P (type)
12561 && TYPE_SIZE (type)
12562 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12563 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12564 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12565 return 128;
12567 if (TREE_CODE (type) == ARRAY_TYPE)
12569 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12570 return 64;
12571 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12572 return 128;
12574 else if (TREE_CODE (type) == COMPLEX_TYPE)
12576 if (TYPE_MODE (type) == DCmode && align < 64)
12577 return 64;
12578 if (TYPE_MODE (type) == XCmode && align < 128)
12579 return 128;
12581 else if ((TREE_CODE (type) == RECORD_TYPE
12582 || TREE_CODE (type) == UNION_TYPE
12583 || TREE_CODE (type) == QUAL_UNION_TYPE)
12584 && TYPE_FIELDS (type))
12586 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12587 return 64;
12588 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12589 return 128;
12591 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12592 || TREE_CODE (type) == INTEGER_TYPE)
12595 if (TYPE_MODE (type) == DFmode && align < 64)
12596 return 64;
12597 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12598 return 128;
12600 return align;
12603 /* Emit RTL insns to initialize the variable parts of a trampoline.
12604 FNADDR is an RTX for the address of the function's pure code.
12605 CXT is an RTX for the static chain value for the function. */
12606 void
12607 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12609 if (!TARGET_64BIT)
12611 /* Compute offset from the end of the jmp to the target function. */
12612 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12613 plus_constant (tramp, 10),
12614 NULL_RTX, 1, OPTAB_DIRECT);
12615 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12616 gen_int_mode (0xb9, QImode));
12617 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12618 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12619 gen_int_mode (0xe9, QImode));
12620 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12622 else
12624 int offset = 0;
12625 /* Try to load address using shorter movl instead of movabs.
12626 We may want to support movq for kernel mode, but kernel does not use
12627 trampolines at the moment. */
12628 if (x86_64_zero_extended_value (fnaddr))
12630 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12631 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12632 gen_int_mode (0xbb41, HImode));
12633 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12634 gen_lowpart (SImode, fnaddr));
12635 offset += 6;
12637 else
12639 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12640 gen_int_mode (0xbb49, HImode));
12641 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12642 fnaddr);
12643 offset += 10;
12645 /* Load static chain using movabs to r10. */
12646 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12647 gen_int_mode (0xba49, HImode));
12648 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12649 cxt);
12650 offset += 10;
12651 /* Jump to the r11 */
12652 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12653 gen_int_mode (0xff49, HImode));
12654 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12655 gen_int_mode (0xe3, QImode));
12656 offset += 3;
12657 if (offset > TRAMPOLINE_SIZE)
12658 abort ();
12661 #ifdef ENABLE_EXECUTE_STACK
12662 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12663 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12664 #endif
12667 #define def_builtin(MASK, NAME, TYPE, CODE) \
12668 do { \
12669 if ((MASK) & target_flags \
12670 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12671 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12672 NULL, NULL_TREE); \
12673 } while (0)
12675 struct builtin_description
12677 const unsigned int mask;
12678 const enum insn_code icode;
12679 const char *const name;
12680 const enum ix86_builtins code;
12681 const enum rtx_code comparison;
12682 const unsigned int flag;
12685 static const struct builtin_description bdesc_comi[] =
12687 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12688 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12689 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12690 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12691 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12692 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12693 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12694 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12695 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12696 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12697 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12698 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12699 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12700 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12701 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12702 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12703 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12704 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12705 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12706 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12707 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12708 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12709 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12710 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12713 static const struct builtin_description bdesc_2arg[] =
12715 /* SSE */
12716 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12717 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12718 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12719 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12720 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12721 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12722 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12723 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12725 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12726 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12727 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12728 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12729 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12730 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12731 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12732 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12733 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12734 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12735 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12736 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12737 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12738 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12739 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12740 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12741 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12742 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12743 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12744 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12746 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12747 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12748 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12749 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12751 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12752 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12753 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12754 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12756 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12757 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12758 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12759 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12760 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12762 /* MMX */
12763 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12764 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12765 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12766 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12767 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12768 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12769 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12770 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12772 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12773 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12774 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12775 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12776 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12777 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12778 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12779 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12781 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12782 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12783 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12785 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12786 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12787 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12788 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12790 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12791 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12793 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12794 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12795 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12796 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12797 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12798 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12800 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12801 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12802 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12803 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12805 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12806 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12807 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12808 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12809 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12810 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12812 /* Special. */
12813 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12814 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12815 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12817 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12818 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12819 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12821 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12822 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12823 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12824 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12825 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12826 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12828 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12829 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12830 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12831 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12832 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12833 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12835 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12836 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12837 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12838 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12840 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12841 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12843 /* SSE2 */
12844 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12845 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12846 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12849 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12850 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12853 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12854 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12855 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12856 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12857 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12858 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12859 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12860 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12861 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12862 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12863 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12864 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12865 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12866 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12867 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12868 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12869 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12870 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12871 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12872 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12874 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12888 /* SSE2 MMX */
12889 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12898 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12899 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12900 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12901 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12902 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12903 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12904 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12905 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12925 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12927 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12928 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12930 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12931 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12932 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12936 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12958 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12963 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12964 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12965 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12971 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12975 /* SSE3 MMX */
12976 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12977 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12978 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12979 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12980 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12981 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12984 static const struct builtin_description bdesc_1arg[] =
12986 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12987 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12989 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12990 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12991 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12993 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12994 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12995 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12996 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12997 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12998 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13020 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13021 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13029 /* SSE3 */
13030 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13031 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13032 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13035 void
13036 ix86_init_builtins (void)
13038 if (TARGET_MMX)
13039 ix86_init_mmx_sse_builtins ();
13042 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13043 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13044 builtins. */
13045 static void
13046 ix86_init_mmx_sse_builtins (void)
13048 const struct builtin_description * d;
13049 size_t i;
13051 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13052 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13053 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13054 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
13055 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13056 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13057 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13058 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13059 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13060 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13062 tree pchar_type_node = build_pointer_type (char_type_node);
13063 tree pcchar_type_node = build_pointer_type (
13064 build_type_variant (char_type_node, 1, 0));
13065 tree pfloat_type_node = build_pointer_type (float_type_node);
13066 tree pcfloat_type_node = build_pointer_type (
13067 build_type_variant (float_type_node, 1, 0));
13068 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13069 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13070 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13072 /* Comparisons. */
13073 tree int_ftype_v4sf_v4sf
13074 = build_function_type_list (integer_type_node,
13075 V4SF_type_node, V4SF_type_node, NULL_TREE);
13076 tree v4si_ftype_v4sf_v4sf
13077 = build_function_type_list (V4SI_type_node,
13078 V4SF_type_node, V4SF_type_node, NULL_TREE);
13079 /* MMX/SSE/integer conversions. */
13080 tree int_ftype_v4sf
13081 = build_function_type_list (integer_type_node,
13082 V4SF_type_node, NULL_TREE);
13083 tree int64_ftype_v4sf
13084 = build_function_type_list (long_long_integer_type_node,
13085 V4SF_type_node, NULL_TREE);
13086 tree int_ftype_v8qi
13087 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13088 tree v4sf_ftype_v4sf_int
13089 = build_function_type_list (V4SF_type_node,
13090 V4SF_type_node, integer_type_node, NULL_TREE);
13091 tree v4sf_ftype_v4sf_int64
13092 = build_function_type_list (V4SF_type_node,
13093 V4SF_type_node, long_long_integer_type_node,
13094 NULL_TREE);
13095 tree v4sf_ftype_v4sf_v2si
13096 = build_function_type_list (V4SF_type_node,
13097 V4SF_type_node, V2SI_type_node, NULL_TREE);
13098 tree int_ftype_v4hi_int
13099 = build_function_type_list (integer_type_node,
13100 V4HI_type_node, integer_type_node, NULL_TREE);
13101 tree v4hi_ftype_v4hi_int_int
13102 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13103 integer_type_node, integer_type_node,
13104 NULL_TREE);
13105 /* Miscellaneous. */
13106 tree v8qi_ftype_v4hi_v4hi
13107 = build_function_type_list (V8QI_type_node,
13108 V4HI_type_node, V4HI_type_node, NULL_TREE);
13109 tree v4hi_ftype_v2si_v2si
13110 = build_function_type_list (V4HI_type_node,
13111 V2SI_type_node, V2SI_type_node, NULL_TREE);
13112 tree v4sf_ftype_v4sf_v4sf_int
13113 = build_function_type_list (V4SF_type_node,
13114 V4SF_type_node, V4SF_type_node,
13115 integer_type_node, NULL_TREE);
13116 tree v2si_ftype_v4hi_v4hi
13117 = build_function_type_list (V2SI_type_node,
13118 V4HI_type_node, V4HI_type_node, NULL_TREE);
13119 tree v4hi_ftype_v4hi_int
13120 = build_function_type_list (V4HI_type_node,
13121 V4HI_type_node, integer_type_node, NULL_TREE);
13122 tree v4hi_ftype_v4hi_di
13123 = build_function_type_list (V4HI_type_node,
13124 V4HI_type_node, long_long_unsigned_type_node,
13125 NULL_TREE);
13126 tree v2si_ftype_v2si_di
13127 = build_function_type_list (V2SI_type_node,
13128 V2SI_type_node, long_long_unsigned_type_node,
13129 NULL_TREE);
13130 tree void_ftype_void
13131 = build_function_type (void_type_node, void_list_node);
13132 tree void_ftype_unsigned
13133 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13134 tree void_ftype_unsigned_unsigned
13135 = build_function_type_list (void_type_node, unsigned_type_node,
13136 unsigned_type_node, NULL_TREE);
13137 tree void_ftype_pcvoid_unsigned_unsigned
13138 = build_function_type_list (void_type_node, const_ptr_type_node,
13139 unsigned_type_node, unsigned_type_node,
13140 NULL_TREE);
13141 tree unsigned_ftype_void
13142 = build_function_type (unsigned_type_node, void_list_node);
13143 tree di_ftype_void
13144 = build_function_type (long_long_unsigned_type_node, void_list_node);
13145 tree v4sf_ftype_void
13146 = build_function_type (V4SF_type_node, void_list_node);
13147 tree v2si_ftype_v4sf
13148 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13149 /* Loads/stores. */
13150 tree void_ftype_v8qi_v8qi_pchar
13151 = build_function_type_list (void_type_node,
13152 V8QI_type_node, V8QI_type_node,
13153 pchar_type_node, NULL_TREE);
13154 tree v4sf_ftype_pcfloat
13155 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13156 /* @@@ the type is bogus */
13157 tree v4sf_ftype_v4sf_pv2si
13158 = build_function_type_list (V4SF_type_node,
13159 V4SF_type_node, pv2si_type_node, NULL_TREE);
13160 tree void_ftype_pv2si_v4sf
13161 = build_function_type_list (void_type_node,
13162 pv2si_type_node, V4SF_type_node, NULL_TREE);
13163 tree void_ftype_pfloat_v4sf
13164 = build_function_type_list (void_type_node,
13165 pfloat_type_node, V4SF_type_node, NULL_TREE);
13166 tree void_ftype_pdi_di
13167 = build_function_type_list (void_type_node,
13168 pdi_type_node, long_long_unsigned_type_node,
13169 NULL_TREE);
13170 tree void_ftype_pv2di_v2di
13171 = build_function_type_list (void_type_node,
13172 pv2di_type_node, V2DI_type_node, NULL_TREE);
13173 /* Normal vector unops. */
13174 tree v4sf_ftype_v4sf
13175 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13177 /* Normal vector binops. */
13178 tree v4sf_ftype_v4sf_v4sf
13179 = build_function_type_list (V4SF_type_node,
13180 V4SF_type_node, V4SF_type_node, NULL_TREE);
13181 tree v8qi_ftype_v8qi_v8qi
13182 = build_function_type_list (V8QI_type_node,
13183 V8QI_type_node, V8QI_type_node, NULL_TREE);
13184 tree v4hi_ftype_v4hi_v4hi
13185 = build_function_type_list (V4HI_type_node,
13186 V4HI_type_node, V4HI_type_node, NULL_TREE);
13187 tree v2si_ftype_v2si_v2si
13188 = build_function_type_list (V2SI_type_node,
13189 V2SI_type_node, V2SI_type_node, NULL_TREE);
13190 tree di_ftype_di_di
13191 = build_function_type_list (long_long_unsigned_type_node,
13192 long_long_unsigned_type_node,
13193 long_long_unsigned_type_node, NULL_TREE);
13195 tree v2si_ftype_v2sf
13196 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13197 tree v2sf_ftype_v2si
13198 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13199 tree v2si_ftype_v2si
13200 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13201 tree v2sf_ftype_v2sf
13202 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13203 tree v2sf_ftype_v2sf_v2sf
13204 = build_function_type_list (V2SF_type_node,
13205 V2SF_type_node, V2SF_type_node, NULL_TREE);
13206 tree v2si_ftype_v2sf_v2sf
13207 = build_function_type_list (V2SI_type_node,
13208 V2SF_type_node, V2SF_type_node, NULL_TREE);
13209 tree pint_type_node = build_pointer_type (integer_type_node);
13210 tree pcint_type_node = build_pointer_type (
13211 build_type_variant (integer_type_node, 1, 0));
13212 tree pdouble_type_node = build_pointer_type (double_type_node);
13213 tree pcdouble_type_node = build_pointer_type (
13214 build_type_variant (double_type_node, 1, 0));
13215 tree int_ftype_v2df_v2df
13216 = build_function_type_list (integer_type_node,
13217 V2DF_type_node, V2DF_type_node, NULL_TREE);
13219 tree ti_ftype_void
13220 = build_function_type (intTI_type_node, void_list_node);
13221 tree v2di_ftype_void
13222 = build_function_type (V2DI_type_node, void_list_node);
13223 tree ti_ftype_ti_ti
13224 = build_function_type_list (intTI_type_node,
13225 intTI_type_node, intTI_type_node, NULL_TREE);
13226 tree void_ftype_pcvoid
13227 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13228 tree v2di_ftype_di
13229 = build_function_type_list (V2DI_type_node,
13230 long_long_unsigned_type_node, NULL_TREE);
13231 tree di_ftype_v2di
13232 = build_function_type_list (long_long_unsigned_type_node,
13233 V2DI_type_node, NULL_TREE);
13234 tree v4sf_ftype_v4si
13235 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13236 tree v4si_ftype_v4sf
13237 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13238 tree v2df_ftype_v4si
13239 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13240 tree v4si_ftype_v2df
13241 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13242 tree v2si_ftype_v2df
13243 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13244 tree v4sf_ftype_v2df
13245 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13246 tree v2df_ftype_v2si
13247 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13248 tree v2df_ftype_v4sf
13249 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13250 tree int_ftype_v2df
13251 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13252 tree int64_ftype_v2df
13253 = build_function_type_list (long_long_integer_type_node,
13254 V2DF_type_node, NULL_TREE);
13255 tree v2df_ftype_v2df_int
13256 = build_function_type_list (V2DF_type_node,
13257 V2DF_type_node, integer_type_node, NULL_TREE);
13258 tree v2df_ftype_v2df_int64
13259 = build_function_type_list (V2DF_type_node,
13260 V2DF_type_node, long_long_integer_type_node,
13261 NULL_TREE);
13262 tree v4sf_ftype_v4sf_v2df
13263 = build_function_type_list (V4SF_type_node,
13264 V4SF_type_node, V2DF_type_node, NULL_TREE);
13265 tree v2df_ftype_v2df_v4sf
13266 = build_function_type_list (V2DF_type_node,
13267 V2DF_type_node, V4SF_type_node, NULL_TREE);
13268 tree v2df_ftype_v2df_v2df_int
13269 = build_function_type_list (V2DF_type_node,
13270 V2DF_type_node, V2DF_type_node,
13271 integer_type_node,
13272 NULL_TREE);
13273 tree v2df_ftype_v2df_pv2si
13274 = build_function_type_list (V2DF_type_node,
13275 V2DF_type_node, pv2si_type_node, NULL_TREE);
13276 tree void_ftype_pv2si_v2df
13277 = build_function_type_list (void_type_node,
13278 pv2si_type_node, V2DF_type_node, NULL_TREE);
13279 tree void_ftype_pdouble_v2df
13280 = build_function_type_list (void_type_node,
13281 pdouble_type_node, V2DF_type_node, NULL_TREE);
13282 tree void_ftype_pint_int
13283 = build_function_type_list (void_type_node,
13284 pint_type_node, integer_type_node, NULL_TREE);
13285 tree void_ftype_v16qi_v16qi_pchar
13286 = build_function_type_list (void_type_node,
13287 V16QI_type_node, V16QI_type_node,
13288 pchar_type_node, NULL_TREE);
13289 tree v2df_ftype_pcdouble
13290 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13291 tree v2df_ftype_v2df_v2df
13292 = build_function_type_list (V2DF_type_node,
13293 V2DF_type_node, V2DF_type_node, NULL_TREE);
13294 tree v16qi_ftype_v16qi_v16qi
13295 = build_function_type_list (V16QI_type_node,
13296 V16QI_type_node, V16QI_type_node, NULL_TREE);
13297 tree v8hi_ftype_v8hi_v8hi
13298 = build_function_type_list (V8HI_type_node,
13299 V8HI_type_node, V8HI_type_node, NULL_TREE);
13300 tree v4si_ftype_v4si_v4si
13301 = build_function_type_list (V4SI_type_node,
13302 V4SI_type_node, V4SI_type_node, NULL_TREE);
13303 tree v2di_ftype_v2di_v2di
13304 = build_function_type_list (V2DI_type_node,
13305 V2DI_type_node, V2DI_type_node, NULL_TREE);
13306 tree v2di_ftype_v2df_v2df
13307 = build_function_type_list (V2DI_type_node,
13308 V2DF_type_node, V2DF_type_node, NULL_TREE);
13309 tree v2df_ftype_v2df
13310 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13311 tree v2df_ftype_double
13312 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13313 tree v2df_ftype_double_double
13314 = build_function_type_list (V2DF_type_node,
13315 double_type_node, double_type_node, NULL_TREE);
13316 tree int_ftype_v8hi_int
13317 = build_function_type_list (integer_type_node,
13318 V8HI_type_node, integer_type_node, NULL_TREE);
13319 tree v8hi_ftype_v8hi_int_int
13320 = build_function_type_list (V8HI_type_node,
13321 V8HI_type_node, integer_type_node,
13322 integer_type_node, NULL_TREE);
13323 tree v2di_ftype_v2di_int
13324 = build_function_type_list (V2DI_type_node,
13325 V2DI_type_node, integer_type_node, NULL_TREE);
13326 tree v4si_ftype_v4si_int
13327 = build_function_type_list (V4SI_type_node,
13328 V4SI_type_node, integer_type_node, NULL_TREE);
13329 tree v8hi_ftype_v8hi_int
13330 = build_function_type_list (V8HI_type_node,
13331 V8HI_type_node, integer_type_node, NULL_TREE);
13332 tree v8hi_ftype_v8hi_v2di
13333 = build_function_type_list (V8HI_type_node,
13334 V8HI_type_node, V2DI_type_node, NULL_TREE);
13335 tree v4si_ftype_v4si_v2di
13336 = build_function_type_list (V4SI_type_node,
13337 V4SI_type_node, V2DI_type_node, NULL_TREE);
13338 tree v4si_ftype_v8hi_v8hi
13339 = build_function_type_list (V4SI_type_node,
13340 V8HI_type_node, V8HI_type_node, NULL_TREE);
13341 tree di_ftype_v8qi_v8qi
13342 = build_function_type_list (long_long_unsigned_type_node,
13343 V8QI_type_node, V8QI_type_node, NULL_TREE);
13344 tree di_ftype_v2si_v2si
13345 = build_function_type_list (long_long_unsigned_type_node,
13346 V2SI_type_node, V2SI_type_node, NULL_TREE);
13347 tree v2di_ftype_v16qi_v16qi
13348 = build_function_type_list (V2DI_type_node,
13349 V16QI_type_node, V16QI_type_node, NULL_TREE);
13350 tree v2di_ftype_v4si_v4si
13351 = build_function_type_list (V2DI_type_node,
13352 V4SI_type_node, V4SI_type_node, NULL_TREE);
13353 tree int_ftype_v16qi
13354 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13355 tree v16qi_ftype_pcchar
13356 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13357 tree void_ftype_pchar_v16qi
13358 = build_function_type_list (void_type_node,
13359 pchar_type_node, V16QI_type_node, NULL_TREE);
13360 tree v4si_ftype_pcint
13361 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13362 tree void_ftype_pcint_v4si
13363 = build_function_type_list (void_type_node,
13364 pcint_type_node, V4SI_type_node, NULL_TREE);
13365 tree v2di_ftype_v2di
13366 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13368 tree float80_type;
13369 tree float128_type;
13371 /* The __float80 type. */
13372 if (TYPE_MODE (long_double_type_node) == XFmode)
13373 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13374 "__float80");
13375 else
13377 /* The __float80 type. */
13378 float80_type = make_node (REAL_TYPE);
13379 TYPE_PRECISION (float80_type) = 96;
13380 layout_type (float80_type);
13381 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13384 float128_type = make_node (REAL_TYPE);
13385 TYPE_PRECISION (float128_type) = 128;
13386 layout_type (float128_type);
13387 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13389 /* Add all builtins that are more or less simple operations on two
13390 operands. */
13391 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13393 /* Use one of the operands; the target can have a different mode for
13394 mask-generating compares. */
13395 enum machine_mode mode;
13396 tree type;
13398 if (d->name == 0)
13399 continue;
13400 mode = insn_data[d->icode].operand[1].mode;
13402 switch (mode)
13404 case V16QImode:
13405 type = v16qi_ftype_v16qi_v16qi;
13406 break;
13407 case V8HImode:
13408 type = v8hi_ftype_v8hi_v8hi;
13409 break;
13410 case V4SImode:
13411 type = v4si_ftype_v4si_v4si;
13412 break;
13413 case V2DImode:
13414 type = v2di_ftype_v2di_v2di;
13415 break;
13416 case V2DFmode:
13417 type = v2df_ftype_v2df_v2df;
13418 break;
13419 case TImode:
13420 type = ti_ftype_ti_ti;
13421 break;
13422 case V4SFmode:
13423 type = v4sf_ftype_v4sf_v4sf;
13424 break;
13425 case V8QImode:
13426 type = v8qi_ftype_v8qi_v8qi;
13427 break;
13428 case V4HImode:
13429 type = v4hi_ftype_v4hi_v4hi;
13430 break;
13431 case V2SImode:
13432 type = v2si_ftype_v2si_v2si;
13433 break;
13434 case DImode:
13435 type = di_ftype_di_di;
13436 break;
13438 default:
13439 abort ();
13442 /* Override for comparisons. */
13443 if (d->icode == CODE_FOR_maskcmpv4sf3
13444 || d->icode == CODE_FOR_maskncmpv4sf3
13445 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13446 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13447 type = v4si_ftype_v4sf_v4sf;
13449 if (d->icode == CODE_FOR_maskcmpv2df3
13450 || d->icode == CODE_FOR_maskncmpv2df3
13451 || d->icode == CODE_FOR_vmmaskcmpv2df3
13452 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13453 type = v2di_ftype_v2df_v2df;
13455 def_builtin (d->mask, d->name, type, d->code);
13458 /* Add the remaining MMX insns with somewhat more complicated types. */
13459 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13460 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13461 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13462 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13463 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13465 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13466 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13467 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13469 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13470 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13472 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13473 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13475 /* comi/ucomi insns. */
13476 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13477 if (d->mask == MASK_SSE2)
13478 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13479 else
13480 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13482 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13483 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13484 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13486 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13487 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13488 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13489 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13490 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13491 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13492 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13493 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13494 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13495 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13496 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13498 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13499 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13501 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13503 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13504 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13505 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13506 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13507 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13508 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13510 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13511 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13512 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13513 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13515 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13516 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13517 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13518 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13520 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13522 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13524 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13525 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13526 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13527 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13528 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13529 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13531 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13533 /* Original 3DNow! */
13534 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13535 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13536 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13537 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13538 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13539 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13540 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13541 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13542 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13543 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13544 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13545 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13546 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13547 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13548 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13549 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13550 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13551 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13552 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13553 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13555 /* 3DNow! extension as used in the Athlon CPU. */
13556 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13557 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13558 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13559 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13560 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13561 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13563 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13565 /* SSE2 */
13566 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13570 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13573 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13574 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13582 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13583 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13605 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13610 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13612 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13614 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13615 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13618 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13622 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13623 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13632 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13638 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13639 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13640 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13641 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13642 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13644 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13646 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13648 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13651 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13653 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13677 /* Prescott New Instructions. */
13678 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13679 void_ftype_pcvoid_unsigned_unsigned,
13680 IX86_BUILTIN_MONITOR);
13681 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13682 void_ftype_unsigned_unsigned,
13683 IX86_BUILTIN_MWAIT);
13684 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13685 v4sf_ftype_v4sf,
13686 IX86_BUILTIN_MOVSHDUP);
13687 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13688 v4sf_ftype_v4sf,
13689 IX86_BUILTIN_MOVSLDUP);
13690 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13691 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13692 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13693 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13694 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13695 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13698 /* Errors in the source file can cause expand_expr to return const0_rtx
13699 where we expect a vector. To avoid crashing, use one of the vector
13700 clear instructions. */
13701 static rtx
13702 safe_vector_operand (rtx x, enum machine_mode mode)
13704 if (x != const0_rtx)
13705 return x;
13706 x = gen_reg_rtx (mode);
13708 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13709 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13710 : gen_rtx_SUBREG (DImode, x, 0)));
13711 else
13712 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13713 : gen_rtx_SUBREG (V4SFmode, x, 0),
13714 CONST0_RTX (V4SFmode)));
13715 return x;
13718 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13720 static rtx
13721 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13723 rtx pat;
13724 tree arg0 = TREE_VALUE (arglist);
13725 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13726 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13727 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13728 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13729 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13730 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13732 if (VECTOR_MODE_P (mode0))
13733 op0 = safe_vector_operand (op0, mode0);
13734 if (VECTOR_MODE_P (mode1))
13735 op1 = safe_vector_operand (op1, mode1);
13737 if (! target
13738 || GET_MODE (target) != tmode
13739 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13740 target = gen_reg_rtx (tmode);
13742 if (GET_MODE (op1) == SImode && mode1 == TImode)
13744 rtx x = gen_reg_rtx (V4SImode);
13745 emit_insn (gen_sse2_loadd (x, op1));
13746 op1 = gen_lowpart (TImode, x);
13749 /* In case the insn wants input operands in modes different from
13750 the result, abort. */
13751 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13752 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13753 abort ();
13755 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13756 op0 = copy_to_mode_reg (mode0, op0);
13757 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13758 op1 = copy_to_mode_reg (mode1, op1);
13760 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13761 yet one of the two must not be a memory. This is normally enforced
13762 by expanders, but we didn't bother to create one here. */
13763 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13764 op0 = copy_to_mode_reg (mode0, op0);
13766 pat = GEN_FCN (icode) (target, op0, op1);
13767 if (! pat)
13768 return 0;
13769 emit_insn (pat);
13770 return target;
13773 /* Subroutine of ix86_expand_builtin to take care of stores. */
13775 static rtx
13776 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13778 rtx pat;
13779 tree arg0 = TREE_VALUE (arglist);
13780 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13781 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13782 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13783 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13784 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13786 if (VECTOR_MODE_P (mode1))
13787 op1 = safe_vector_operand (op1, mode1);
13789 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13790 op1 = copy_to_mode_reg (mode1, op1);
13792 pat = GEN_FCN (icode) (op0, op1);
13793 if (pat)
13794 emit_insn (pat);
13795 return 0;
13798 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13800 static rtx
13801 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13802 rtx target, int do_load)
13804 rtx pat;
13805 tree arg0 = TREE_VALUE (arglist);
13806 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13807 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13808 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13810 if (! target
13811 || GET_MODE (target) != tmode
13812 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13813 target = gen_reg_rtx (tmode);
13814 if (do_load)
13815 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13816 else
13818 if (VECTOR_MODE_P (mode0))
13819 op0 = safe_vector_operand (op0, mode0);
13821 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13822 op0 = copy_to_mode_reg (mode0, op0);
13825 pat = GEN_FCN (icode) (target, op0);
13826 if (! pat)
13827 return 0;
13828 emit_insn (pat);
13829 return target;
13832 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13833 sqrtss, rsqrtss, rcpss. */
13835 static rtx
13836 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13838 rtx pat;
13839 tree arg0 = TREE_VALUE (arglist);
13840 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13841 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13842 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13844 if (! target
13845 || GET_MODE (target) != tmode
13846 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13847 target = gen_reg_rtx (tmode);
13849 if (VECTOR_MODE_P (mode0))
13850 op0 = safe_vector_operand (op0, mode0);
13852 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13853 op0 = copy_to_mode_reg (mode0, op0);
13855 op1 = op0;
13856 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13857 op1 = copy_to_mode_reg (mode0, op1);
13859 pat = GEN_FCN (icode) (target, op0, op1);
13860 if (! pat)
13861 return 0;
13862 emit_insn (pat);
13863 return target;
13866 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13868 static rtx
13869 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13870 rtx target)
13872 rtx pat;
13873 tree arg0 = TREE_VALUE (arglist);
13874 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13875 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13876 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13877 rtx op2;
13878 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13879 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13880 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13881 enum rtx_code comparison = d->comparison;
13883 if (VECTOR_MODE_P (mode0))
13884 op0 = safe_vector_operand (op0, mode0);
13885 if (VECTOR_MODE_P (mode1))
13886 op1 = safe_vector_operand (op1, mode1);
13888 /* Swap operands if we have a comparison that isn't available in
13889 hardware. */
13890 if (d->flag)
13892 rtx tmp = gen_reg_rtx (mode1);
13893 emit_move_insn (tmp, op1);
13894 op1 = op0;
13895 op0 = tmp;
13898 if (! target
13899 || GET_MODE (target) != tmode
13900 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13901 target = gen_reg_rtx (tmode);
13903 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13904 op0 = copy_to_mode_reg (mode0, op0);
13905 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13906 op1 = copy_to_mode_reg (mode1, op1);
13908 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13909 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13910 if (! pat)
13911 return 0;
13912 emit_insn (pat);
13913 return target;
13916 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13918 static rtx
13919 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13920 rtx target)
13922 rtx pat;
13923 tree arg0 = TREE_VALUE (arglist);
13924 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13925 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13926 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13927 rtx op2;
13928 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13929 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13930 enum rtx_code comparison = d->comparison;
13932 if (VECTOR_MODE_P (mode0))
13933 op0 = safe_vector_operand (op0, mode0);
13934 if (VECTOR_MODE_P (mode1))
13935 op1 = safe_vector_operand (op1, mode1);
13937 /* Swap operands if we have a comparison that isn't available in
13938 hardware. */
13939 if (d->flag)
13941 rtx tmp = op1;
13942 op1 = op0;
13943 op0 = tmp;
13946 target = gen_reg_rtx (SImode);
13947 emit_move_insn (target, const0_rtx);
13948 target = gen_rtx_SUBREG (QImode, target, 0);
13950 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13951 op0 = copy_to_mode_reg (mode0, op0);
13952 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13953 op1 = copy_to_mode_reg (mode1, op1);
13955 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13956 pat = GEN_FCN (d->icode) (op0, op1);
13957 if (! pat)
13958 return 0;
13959 emit_insn (pat);
13960 emit_insn (gen_rtx_SET (VOIDmode,
13961 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13962 gen_rtx_fmt_ee (comparison, QImode,
13963 SET_DEST (pat),
13964 const0_rtx)));
13966 return SUBREG_REG (target);
13969 /* Expand an expression EXP that calls a built-in function,
13970 with result going to TARGET if that's convenient
13971 (and in mode MODE if that's convenient).
13972 SUBTARGET may be used as the target for computing one of EXP's operands.
13973 IGNORE is nonzero if the value is to be ignored. */
13976 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13977 enum machine_mode mode ATTRIBUTE_UNUSED,
13978 int ignore ATTRIBUTE_UNUSED)
13980 const struct builtin_description *d;
13981 size_t i;
13982 enum insn_code icode;
13983 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13984 tree arglist = TREE_OPERAND (exp, 1);
13985 tree arg0, arg1, arg2;
13986 rtx op0, op1, op2, pat;
13987 enum machine_mode tmode, mode0, mode1, mode2;
13988 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13990 switch (fcode)
13992 case IX86_BUILTIN_EMMS:
13993 emit_insn (gen_emms ());
13994 return 0;
13996 case IX86_BUILTIN_SFENCE:
13997 emit_insn (gen_sfence ());
13998 return 0;
14000 case IX86_BUILTIN_PEXTRW:
14001 case IX86_BUILTIN_PEXTRW128:
14002 icode = (fcode == IX86_BUILTIN_PEXTRW
14003 ? CODE_FOR_mmx_pextrw
14004 : CODE_FOR_sse2_pextrw);
14005 arg0 = TREE_VALUE (arglist);
14006 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14007 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14008 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14009 tmode = insn_data[icode].operand[0].mode;
14010 mode0 = insn_data[icode].operand[1].mode;
14011 mode1 = insn_data[icode].operand[2].mode;
14013 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14014 op0 = copy_to_mode_reg (mode0, op0);
14015 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14017 error ("selector must be an integer constant in the range 0..%i",
14018 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14019 return gen_reg_rtx (tmode);
14021 if (target == 0
14022 || GET_MODE (target) != tmode
14023 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14024 target = gen_reg_rtx (tmode);
14025 pat = GEN_FCN (icode) (target, op0, op1);
14026 if (! pat)
14027 return 0;
14028 emit_insn (pat);
14029 return target;
14031 case IX86_BUILTIN_PINSRW:
14032 case IX86_BUILTIN_PINSRW128:
14033 icode = (fcode == IX86_BUILTIN_PINSRW
14034 ? CODE_FOR_mmx_pinsrw
14035 : CODE_FOR_sse2_pinsrw);
14036 arg0 = TREE_VALUE (arglist);
14037 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14038 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14039 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14040 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14041 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14042 tmode = insn_data[icode].operand[0].mode;
14043 mode0 = insn_data[icode].operand[1].mode;
14044 mode1 = insn_data[icode].operand[2].mode;
14045 mode2 = insn_data[icode].operand[3].mode;
14047 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14048 op0 = copy_to_mode_reg (mode0, op0);
14049 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14050 op1 = copy_to_mode_reg (mode1, op1);
14051 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14053 error ("selector must be an integer constant in the range 0..%i",
14054 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14055 return const0_rtx;
14057 if (target == 0
14058 || GET_MODE (target) != tmode
14059 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14060 target = gen_reg_rtx (tmode);
14061 pat = GEN_FCN (icode) (target, op0, op1, op2);
14062 if (! pat)
14063 return 0;
14064 emit_insn (pat);
14065 return target;
14067 case IX86_BUILTIN_MASKMOVQ:
14068 case IX86_BUILTIN_MASKMOVDQU:
14069 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14070 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14071 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14072 : CODE_FOR_sse2_maskmovdqu));
14073 /* Note the arg order is different from the operand order. */
14074 arg1 = TREE_VALUE (arglist);
14075 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14076 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14077 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14078 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14079 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14080 mode0 = insn_data[icode].operand[0].mode;
14081 mode1 = insn_data[icode].operand[1].mode;
14082 mode2 = insn_data[icode].operand[2].mode;
14084 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14085 op0 = copy_to_mode_reg (mode0, op0);
14086 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14087 op1 = copy_to_mode_reg (mode1, op1);
14088 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14089 op2 = copy_to_mode_reg (mode2, op2);
14090 pat = GEN_FCN (icode) (op0, op1, op2);
14091 if (! pat)
14092 return 0;
14093 emit_insn (pat);
14094 return 0;
14096 case IX86_BUILTIN_SQRTSS:
14097 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14098 case IX86_BUILTIN_RSQRTSS:
14099 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14100 case IX86_BUILTIN_RCPSS:
14101 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14103 case IX86_BUILTIN_LOADAPS:
14104 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14106 case IX86_BUILTIN_LOADUPS:
14107 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14109 case IX86_BUILTIN_STOREAPS:
14110 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14112 case IX86_BUILTIN_STOREUPS:
14113 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14115 case IX86_BUILTIN_LOADSS:
14116 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14118 case IX86_BUILTIN_STORESS:
14119 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14121 case IX86_BUILTIN_LOADHPS:
14122 case IX86_BUILTIN_LOADLPS:
14123 case IX86_BUILTIN_LOADHPD:
14124 case IX86_BUILTIN_LOADLPD:
14125 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14126 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14127 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14128 : CODE_FOR_sse2_movsd);
14129 arg0 = TREE_VALUE (arglist);
14130 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14131 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14132 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14133 tmode = insn_data[icode].operand[0].mode;
14134 mode0 = insn_data[icode].operand[1].mode;
14135 mode1 = insn_data[icode].operand[2].mode;
14137 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14138 op0 = copy_to_mode_reg (mode0, op0);
14139 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14140 if (target == 0
14141 || GET_MODE (target) != tmode
14142 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14143 target = gen_reg_rtx (tmode);
14144 pat = GEN_FCN (icode) (target, op0, op1);
14145 if (! pat)
14146 return 0;
14147 emit_insn (pat);
14148 return target;
14150 case IX86_BUILTIN_STOREHPS:
14151 case IX86_BUILTIN_STORELPS:
14152 case IX86_BUILTIN_STOREHPD:
14153 case IX86_BUILTIN_STORELPD:
14154 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14155 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14156 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14157 : CODE_FOR_sse2_movsd);
14158 arg0 = TREE_VALUE (arglist);
14159 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14160 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14161 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14162 mode0 = insn_data[icode].operand[1].mode;
14163 mode1 = insn_data[icode].operand[2].mode;
14165 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14166 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14167 op1 = copy_to_mode_reg (mode1, op1);
14169 pat = GEN_FCN (icode) (op0, op0, op1);
14170 if (! pat)
14171 return 0;
14172 emit_insn (pat);
14173 return 0;
14175 case IX86_BUILTIN_MOVNTPS:
14176 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14177 case IX86_BUILTIN_MOVNTQ:
14178 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14180 case IX86_BUILTIN_LDMXCSR:
14181 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14182 target = assign_386_stack_local (SImode, 0);
14183 emit_move_insn (target, op0);
14184 emit_insn (gen_ldmxcsr (target));
14185 return 0;
14187 case IX86_BUILTIN_STMXCSR:
14188 target = assign_386_stack_local (SImode, 0);
14189 emit_insn (gen_stmxcsr (target));
14190 return copy_to_mode_reg (SImode, target);
14192 case IX86_BUILTIN_SHUFPS:
14193 case IX86_BUILTIN_SHUFPD:
14194 icode = (fcode == IX86_BUILTIN_SHUFPS
14195 ? CODE_FOR_sse_shufps
14196 : CODE_FOR_sse2_shufpd);
14197 arg0 = TREE_VALUE (arglist);
14198 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14199 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14200 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14201 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14202 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14203 tmode = insn_data[icode].operand[0].mode;
14204 mode0 = insn_data[icode].operand[1].mode;
14205 mode1 = insn_data[icode].operand[2].mode;
14206 mode2 = insn_data[icode].operand[3].mode;
14208 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14209 op0 = copy_to_mode_reg (mode0, op0);
14210 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14211 op1 = copy_to_mode_reg (mode1, op1);
14212 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14214 /* @@@ better error message */
14215 error ("mask must be an immediate");
14216 return gen_reg_rtx (tmode);
14218 if (target == 0
14219 || GET_MODE (target) != tmode
14220 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14221 target = gen_reg_rtx (tmode);
14222 pat = GEN_FCN (icode) (target, op0, op1, op2);
14223 if (! pat)
14224 return 0;
14225 emit_insn (pat);
14226 return target;
14228 case IX86_BUILTIN_PSHUFW:
14229 case IX86_BUILTIN_PSHUFD:
14230 case IX86_BUILTIN_PSHUFHW:
14231 case IX86_BUILTIN_PSHUFLW:
14232 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14233 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14234 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14235 : CODE_FOR_mmx_pshufw);
14236 arg0 = TREE_VALUE (arglist);
14237 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14238 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14239 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14240 tmode = insn_data[icode].operand[0].mode;
14241 mode1 = insn_data[icode].operand[1].mode;
14242 mode2 = insn_data[icode].operand[2].mode;
14244 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14245 op0 = copy_to_mode_reg (mode1, op0);
14246 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14248 /* @@@ better error message */
14249 error ("mask must be an immediate");
14250 return const0_rtx;
14252 if (target == 0
14253 || GET_MODE (target) != tmode
14254 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14255 target = gen_reg_rtx (tmode);
14256 pat = GEN_FCN (icode) (target, op0, op1);
14257 if (! pat)
14258 return 0;
14259 emit_insn (pat);
14260 return target;
14262 case IX86_BUILTIN_PSLLDQI128:
14263 case IX86_BUILTIN_PSRLDQI128:
14264 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14265 : CODE_FOR_sse2_lshrti3);
14266 arg0 = TREE_VALUE (arglist);
14267 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14268 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14269 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14270 tmode = insn_data[icode].operand[0].mode;
14271 mode1 = insn_data[icode].operand[1].mode;
14272 mode2 = insn_data[icode].operand[2].mode;
14274 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14276 op0 = copy_to_reg (op0);
14277 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14279 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14281 error ("shift must be an immediate");
14282 return const0_rtx;
14284 target = gen_reg_rtx (V2DImode);
14285 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14286 if (! pat)
14287 return 0;
14288 emit_insn (pat);
14289 return target;
14291 case IX86_BUILTIN_FEMMS:
14292 emit_insn (gen_femms ());
14293 return NULL_RTX;
14295 case IX86_BUILTIN_PAVGUSB:
14296 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14298 case IX86_BUILTIN_PF2ID:
14299 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14301 case IX86_BUILTIN_PFACC:
14302 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14304 case IX86_BUILTIN_PFADD:
14305 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14307 case IX86_BUILTIN_PFCMPEQ:
14308 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14310 case IX86_BUILTIN_PFCMPGE:
14311 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14313 case IX86_BUILTIN_PFCMPGT:
14314 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14316 case IX86_BUILTIN_PFMAX:
14317 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14319 case IX86_BUILTIN_PFMIN:
14320 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14322 case IX86_BUILTIN_PFMUL:
14323 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14325 case IX86_BUILTIN_PFRCP:
14326 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14328 case IX86_BUILTIN_PFRCPIT1:
14329 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14331 case IX86_BUILTIN_PFRCPIT2:
14332 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14334 case IX86_BUILTIN_PFRSQIT1:
14335 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14337 case IX86_BUILTIN_PFRSQRT:
14338 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14340 case IX86_BUILTIN_PFSUB:
14341 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14343 case IX86_BUILTIN_PFSUBR:
14344 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14346 case IX86_BUILTIN_PI2FD:
14347 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14349 case IX86_BUILTIN_PMULHRW:
14350 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14352 case IX86_BUILTIN_PF2IW:
14353 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14355 case IX86_BUILTIN_PFNACC:
14356 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14358 case IX86_BUILTIN_PFPNACC:
14359 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14361 case IX86_BUILTIN_PI2FW:
14362 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14364 case IX86_BUILTIN_PSWAPDSI:
14365 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14367 case IX86_BUILTIN_PSWAPDSF:
14368 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14370 case IX86_BUILTIN_SSE_ZERO:
14371 target = gen_reg_rtx (V4SFmode);
14372 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14373 return target;
14375 case IX86_BUILTIN_MMX_ZERO:
14376 target = gen_reg_rtx (DImode);
14377 emit_insn (gen_mmx_clrdi (target));
14378 return target;
14380 case IX86_BUILTIN_CLRTI:
14381 target = gen_reg_rtx (V2DImode);
14382 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14383 return target;
14386 case IX86_BUILTIN_SQRTSD:
14387 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14388 case IX86_BUILTIN_LOADAPD:
14389 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14390 case IX86_BUILTIN_LOADUPD:
14391 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14393 case IX86_BUILTIN_STOREAPD:
14394 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14395 case IX86_BUILTIN_STOREUPD:
14396 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14398 case IX86_BUILTIN_LOADSD:
14399 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14401 case IX86_BUILTIN_STORESD:
14402 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14404 case IX86_BUILTIN_SETPD1:
14405 target = assign_386_stack_local (DFmode, 0);
14406 arg0 = TREE_VALUE (arglist);
14407 emit_move_insn (adjust_address (target, DFmode, 0),
14408 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14409 op0 = gen_reg_rtx (V2DFmode);
14410 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14411 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14412 return op0;
14414 case IX86_BUILTIN_SETPD:
14415 target = assign_386_stack_local (V2DFmode, 0);
14416 arg0 = TREE_VALUE (arglist);
14417 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14418 emit_move_insn (adjust_address (target, DFmode, 0),
14419 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14420 emit_move_insn (adjust_address (target, DFmode, 8),
14421 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14422 op0 = gen_reg_rtx (V2DFmode);
14423 emit_insn (gen_sse2_movapd (op0, target));
14424 return op0;
14426 case IX86_BUILTIN_LOADRPD:
14427 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14428 gen_reg_rtx (V2DFmode), 1);
14429 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14430 return target;
14432 case IX86_BUILTIN_LOADPD1:
14433 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14434 gen_reg_rtx (V2DFmode), 1);
14435 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14436 return target;
14438 case IX86_BUILTIN_STOREPD1:
14439 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14440 case IX86_BUILTIN_STORERPD:
14441 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14443 case IX86_BUILTIN_CLRPD:
14444 target = gen_reg_rtx (V2DFmode);
14445 emit_insn (gen_sse_clrv2df (target));
14446 return target;
14448 case IX86_BUILTIN_MFENCE:
14449 emit_insn (gen_sse2_mfence ());
14450 return 0;
14451 case IX86_BUILTIN_LFENCE:
14452 emit_insn (gen_sse2_lfence ());
14453 return 0;
14455 case IX86_BUILTIN_CLFLUSH:
14456 arg0 = TREE_VALUE (arglist);
14457 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14458 icode = CODE_FOR_sse2_clflush;
14459 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14460 op0 = copy_to_mode_reg (Pmode, op0);
14462 emit_insn (gen_sse2_clflush (op0));
14463 return 0;
14465 case IX86_BUILTIN_MOVNTPD:
14466 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14467 case IX86_BUILTIN_MOVNTDQ:
14468 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14469 case IX86_BUILTIN_MOVNTI:
14470 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14472 case IX86_BUILTIN_LOADDQA:
14473 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14474 case IX86_BUILTIN_LOADDQU:
14475 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14476 case IX86_BUILTIN_LOADD:
14477 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14479 case IX86_BUILTIN_STOREDQA:
14480 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14481 case IX86_BUILTIN_STOREDQU:
14482 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14483 case IX86_BUILTIN_STORED:
14484 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14486 case IX86_BUILTIN_MONITOR:
14487 arg0 = TREE_VALUE (arglist);
14488 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14489 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14490 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14491 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14492 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14493 if (!REG_P (op0))
14494 op0 = copy_to_mode_reg (SImode, op0);
14495 if (!REG_P (op1))
14496 op1 = copy_to_mode_reg (SImode, op1);
14497 if (!REG_P (op2))
14498 op2 = copy_to_mode_reg (SImode, op2);
14499 emit_insn (gen_monitor (op0, op1, op2));
14500 return 0;
14502 case IX86_BUILTIN_MWAIT:
14503 arg0 = TREE_VALUE (arglist);
14504 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14505 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14506 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14507 if (!REG_P (op0))
14508 op0 = copy_to_mode_reg (SImode, op0);
14509 if (!REG_P (op1))
14510 op1 = copy_to_mode_reg (SImode, op1);
14511 emit_insn (gen_mwait (op0, op1));
14512 return 0;
14514 case IX86_BUILTIN_LOADDDUP:
14515 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14517 case IX86_BUILTIN_LDDQU:
14518 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14521 default:
14522 break;
14525 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14526 if (d->code == fcode)
14528 /* Compares are treated specially. */
14529 if (d->icode == CODE_FOR_maskcmpv4sf3
14530 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14531 || d->icode == CODE_FOR_maskncmpv4sf3
14532 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14533 || d->icode == CODE_FOR_maskcmpv2df3
14534 || d->icode == CODE_FOR_vmmaskcmpv2df3
14535 || d->icode == CODE_FOR_maskncmpv2df3
14536 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14537 return ix86_expand_sse_compare (d, arglist, target);
14539 return ix86_expand_binop_builtin (d->icode, arglist, target);
14542 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14543 if (d->code == fcode)
14544 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14546 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14547 if (d->code == fcode)
14548 return ix86_expand_sse_comi (d, arglist, target);
14550 /* @@@ Should really do something sensible here. */
14551 return 0;
14554 /* Store OPERAND to the memory after reload is completed. This means
14555 that we can't easily use assign_stack_local. */
14557 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14559 rtx result;
14560 if (!reload_completed)
14561 abort ();
14562 if (TARGET_RED_ZONE)
14564 result = gen_rtx_MEM (mode,
14565 gen_rtx_PLUS (Pmode,
14566 stack_pointer_rtx,
14567 GEN_INT (-RED_ZONE_SIZE)));
14568 emit_move_insn (result, operand);
14570 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14572 switch (mode)
14574 case HImode:
14575 case SImode:
14576 operand = gen_lowpart (DImode, operand);
14577 /* FALLTHRU */
14578 case DImode:
14579 emit_insn (
14580 gen_rtx_SET (VOIDmode,
14581 gen_rtx_MEM (DImode,
14582 gen_rtx_PRE_DEC (DImode,
14583 stack_pointer_rtx)),
14584 operand));
14585 break;
14586 default:
14587 abort ();
14589 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14591 else
14593 switch (mode)
14595 case DImode:
14597 rtx operands[2];
14598 split_di (&operand, 1, operands, operands + 1);
14599 emit_insn (
14600 gen_rtx_SET (VOIDmode,
14601 gen_rtx_MEM (SImode,
14602 gen_rtx_PRE_DEC (Pmode,
14603 stack_pointer_rtx)),
14604 operands[1]));
14605 emit_insn (
14606 gen_rtx_SET (VOIDmode,
14607 gen_rtx_MEM (SImode,
14608 gen_rtx_PRE_DEC (Pmode,
14609 stack_pointer_rtx)),
14610 operands[0]));
14612 break;
14613 case HImode:
14614 /* It is better to store HImodes as SImodes. */
14615 if (!TARGET_PARTIAL_REG_STALL)
14616 operand = gen_lowpart (SImode, operand);
14617 /* FALLTHRU */
14618 case SImode:
14619 emit_insn (
14620 gen_rtx_SET (VOIDmode,
14621 gen_rtx_MEM (GET_MODE (operand),
14622 gen_rtx_PRE_DEC (SImode,
14623 stack_pointer_rtx)),
14624 operand));
14625 break;
14626 default:
14627 abort ();
14629 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14631 return result;
14634 /* Free operand from the memory. */
14635 void
14636 ix86_free_from_memory (enum machine_mode mode)
14638 if (!TARGET_RED_ZONE)
14640 int size;
14642 if (mode == DImode || TARGET_64BIT)
14643 size = 8;
14644 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14645 size = 2;
14646 else
14647 size = 4;
14648 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14649 to pop or add instruction if registers are available. */
14650 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14651 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14652 GEN_INT (size))));
14656 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14657 QImode must go into class Q_REGS.
14658 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14659 movdf to do mem-to-mem moves through integer regs. */
14660 enum reg_class
14661 ix86_preferred_reload_class (rtx x, enum reg_class class)
14663 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14664 return NO_REGS;
14665 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14667 /* SSE can't load any constant directly yet. */
14668 if (SSE_CLASS_P (class))
14669 return NO_REGS;
14670 /* Floats can load 0 and 1. */
14671 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14673 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14674 if (MAYBE_SSE_CLASS_P (class))
14675 return (reg_class_subset_p (class, GENERAL_REGS)
14676 ? GENERAL_REGS : FLOAT_REGS);
14677 else
14678 return class;
14680 /* General regs can load everything. */
14681 if (reg_class_subset_p (class, GENERAL_REGS))
14682 return GENERAL_REGS;
14683 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14684 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14685 return NO_REGS;
14687 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14688 return NO_REGS;
14689 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14690 return Q_REGS;
14691 return class;
14694 /* If we are copying between general and FP registers, we need a memory
14695 location. The same is true for SSE and MMX registers.
14697 The macro can't work reliably when one of the CLASSES is class containing
14698 registers from multiple units (SSE, MMX, integer). We avoid this by never
14699 combining those units in single alternative in the machine description.
14700 Ensure that this constraint holds to avoid unexpected surprises.
14702 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14703 enforce these sanity checks. */
14705 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14706 enum machine_mode mode, int strict)
14708 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14709 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14710 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14711 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14712 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14713 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14715 if (strict)
14716 abort ();
14717 else
14718 return 1;
14720 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14721 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14722 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14723 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14724 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14726 /* Return the cost of moving data from a register in class CLASS1 to
14727 one in class CLASS2.
14729 It is not required that the cost always equal 2 when FROM is the same as TO;
14730 on some machines it is expensive to move between registers if they are not
14731 general registers. */
14733 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14734 enum reg_class class2)
14736 /* In case we require secondary memory, compute cost of the store followed
14737 by load. In order to avoid bad register allocation choices, we need
14738 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14740 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14742 int cost = 1;
14744 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14745 MEMORY_MOVE_COST (mode, class1, 1));
14746 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14747 MEMORY_MOVE_COST (mode, class2, 1));
14749 /* In case of copying from general_purpose_register we may emit multiple
14750 stores followed by single load causing memory size mismatch stall.
14751 Count this as arbitrarily high cost of 20. */
14752 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14753 cost += 20;
14755 /* In the case of FP/MMX moves, the registers actually overlap, and we
14756 have to switch modes in order to treat them differently. */
14757 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14758 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14759 cost += 20;
14761 return cost;
14764 /* Moves between SSE/MMX and integer unit are expensive. */
14765 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14766 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14767 return ix86_cost->mmxsse_to_integer;
14768 if (MAYBE_FLOAT_CLASS_P (class1))
14769 return ix86_cost->fp_move;
14770 if (MAYBE_SSE_CLASS_P (class1))
14771 return ix86_cost->sse_move;
14772 if (MAYBE_MMX_CLASS_P (class1))
14773 return ix86_cost->mmx_move;
14774 return 2;
14777 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14779 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14781 /* Flags and only flags can only hold CCmode values. */
14782 if (CC_REGNO_P (regno))
14783 return GET_MODE_CLASS (mode) == MODE_CC;
14784 if (GET_MODE_CLASS (mode) == MODE_CC
14785 || GET_MODE_CLASS (mode) == MODE_RANDOM
14786 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14787 return 0;
14788 if (FP_REGNO_P (regno))
14789 return VALID_FP_MODE_P (mode);
14790 if (SSE_REGNO_P (regno))
14791 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14792 if (MMX_REGNO_P (regno))
14793 return (TARGET_MMX
14794 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14795 /* We handle both integer and floats in the general purpose registers.
14796 In future we should be able to handle vector modes as well. */
14797 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14798 return 0;
14799 /* Take care for QImode values - they can be in non-QI regs, but then
14800 they do cause partial register stalls. */
14801 if (regno < 4 || mode != QImode || TARGET_64BIT)
14802 return 1;
14803 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14806 /* Return the cost of moving data of mode M between a
14807 register and memory. A value of 2 is the default; this cost is
14808 relative to those in `REGISTER_MOVE_COST'.
14810 If moving between registers and memory is more expensive than
14811 between two registers, you should define this macro to express the
14812 relative cost.
14814 Model also increased moving costs of QImode registers in non
14815 Q_REGS classes.
14818 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14820 if (FLOAT_CLASS_P (class))
14822 int index;
14823 switch (mode)
14825 case SFmode:
14826 index = 0;
14827 break;
14828 case DFmode:
14829 index = 1;
14830 break;
14831 case XFmode:
14832 index = 2;
14833 break;
14834 default:
14835 return 100;
14837 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14839 if (SSE_CLASS_P (class))
14841 int index;
14842 switch (GET_MODE_SIZE (mode))
14844 case 4:
14845 index = 0;
14846 break;
14847 case 8:
14848 index = 1;
14849 break;
14850 case 16:
14851 index = 2;
14852 break;
14853 default:
14854 return 100;
14856 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14858 if (MMX_CLASS_P (class))
14860 int index;
14861 switch (GET_MODE_SIZE (mode))
14863 case 4:
14864 index = 0;
14865 break;
14866 case 8:
14867 index = 1;
14868 break;
14869 default:
14870 return 100;
14872 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14874 switch (GET_MODE_SIZE (mode))
14876 case 1:
14877 if (in)
14878 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14879 : ix86_cost->movzbl_load);
14880 else
14881 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14882 : ix86_cost->int_store[0] + 4);
14883 break;
14884 case 2:
14885 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14886 default:
14887 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14888 if (mode == TFmode)
14889 mode = XFmode;
14890 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14891 * (((int) GET_MODE_SIZE (mode)
14892 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14896 /* Compute a (partial) cost for rtx X. Return true if the complete
14897 cost has been computed, and false if subexpressions should be
14898 scanned. In either case, *TOTAL contains the cost result. */
14900 static bool
14901 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14903 enum machine_mode mode = GET_MODE (x);
14905 switch (code)
14907 case CONST_INT:
14908 case CONST:
14909 case LABEL_REF:
14910 case SYMBOL_REF:
14911 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14912 *total = 3;
14913 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14914 *total = 2;
14915 else if (flag_pic && SYMBOLIC_CONST (x)
14916 && (!TARGET_64BIT
14917 || (!GET_CODE (x) != LABEL_REF
14918 && (GET_CODE (x) != SYMBOL_REF
14919 || !SYMBOL_REF_LOCAL_P (x)))))
14920 *total = 1;
14921 else
14922 *total = 0;
14923 return true;
14925 case CONST_DOUBLE:
14926 if (mode == VOIDmode)
14927 *total = 0;
14928 else
14929 switch (standard_80387_constant_p (x))
14931 case 1: /* 0.0 */
14932 *total = 1;
14933 break;
14934 default: /* Other constants */
14935 *total = 2;
14936 break;
14937 case 0:
14938 case -1:
14939 /* Start with (MEM (SYMBOL_REF)), since that's where
14940 it'll probably end up. Add a penalty for size. */
14941 *total = (COSTS_N_INSNS (1)
14942 + (flag_pic != 0 && !TARGET_64BIT)
14943 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14944 break;
14946 return true;
14948 case ZERO_EXTEND:
14949 /* The zero extensions is often completely free on x86_64, so make
14950 it as cheap as possible. */
14951 if (TARGET_64BIT && mode == DImode
14952 && GET_MODE (XEXP (x, 0)) == SImode)
14953 *total = 1;
14954 else if (TARGET_ZERO_EXTEND_WITH_AND)
14955 *total = COSTS_N_INSNS (ix86_cost->add);
14956 else
14957 *total = COSTS_N_INSNS (ix86_cost->movzx);
14958 return false;
14960 case SIGN_EXTEND:
14961 *total = COSTS_N_INSNS (ix86_cost->movsx);
14962 return false;
14964 case ASHIFT:
14965 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14966 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14968 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14969 if (value == 1)
14971 *total = COSTS_N_INSNS (ix86_cost->add);
14972 return false;
14974 if ((value == 2 || value == 3)
14975 && ix86_cost->lea <= ix86_cost->shift_const)
14977 *total = COSTS_N_INSNS (ix86_cost->lea);
14978 return false;
14981 /* FALLTHRU */
14983 case ROTATE:
14984 case ASHIFTRT:
14985 case LSHIFTRT:
14986 case ROTATERT:
14987 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14989 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14991 if (INTVAL (XEXP (x, 1)) > 32)
14992 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14993 else
14994 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14996 else
14998 if (GET_CODE (XEXP (x, 1)) == AND)
14999 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15000 else
15001 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15004 else
15006 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15007 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15008 else
15009 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15011 return false;
15013 case MULT:
15014 if (FLOAT_MODE_P (mode))
15016 *total = COSTS_N_INSNS (ix86_cost->fmul);
15017 return false;
15019 else
15021 rtx op0 = XEXP (x, 0);
15022 rtx op1 = XEXP (x, 1);
15023 int nbits;
15024 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15026 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15027 for (nbits = 0; value != 0; value &= value - 1)
15028 nbits++;
15030 else
15031 /* This is arbitrary. */
15032 nbits = 7;
15034 /* Compute costs correctly for widening multiplication. */
15035 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15036 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15037 == GET_MODE_SIZE (mode))
15039 int is_mulwiden = 0;
15040 enum machine_mode inner_mode = GET_MODE (op0);
15042 if (GET_CODE (op0) == GET_CODE (op1))
15043 is_mulwiden = 1, op1 = XEXP (op1, 0);
15044 else if (GET_CODE (op1) == CONST_INT)
15046 if (GET_CODE (op0) == SIGN_EXTEND)
15047 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15048 == INTVAL (op1);
15049 else
15050 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15053 if (is_mulwiden)
15054 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15057 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15058 + nbits * ix86_cost->mult_bit)
15059 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15061 return true;
15064 case DIV:
15065 case UDIV:
15066 case MOD:
15067 case UMOD:
15068 if (FLOAT_MODE_P (mode))
15069 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15070 else
15071 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15072 return false;
15074 case PLUS:
15075 if (FLOAT_MODE_P (mode))
15076 *total = COSTS_N_INSNS (ix86_cost->fadd);
15077 else if (GET_MODE_CLASS (mode) == MODE_INT
15078 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15080 if (GET_CODE (XEXP (x, 0)) == PLUS
15081 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15082 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15083 && CONSTANT_P (XEXP (x, 1)))
15085 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15086 if (val == 2 || val == 4 || val == 8)
15088 *total = COSTS_N_INSNS (ix86_cost->lea);
15089 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15090 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15091 outer_code);
15092 *total += rtx_cost (XEXP (x, 1), outer_code);
15093 return true;
15096 else if (GET_CODE (XEXP (x, 0)) == MULT
15097 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15099 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15100 if (val == 2 || val == 4 || val == 8)
15102 *total = COSTS_N_INSNS (ix86_cost->lea);
15103 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15104 *total += rtx_cost (XEXP (x, 1), outer_code);
15105 return true;
15108 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15110 *total = COSTS_N_INSNS (ix86_cost->lea);
15111 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15112 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15113 *total += rtx_cost (XEXP (x, 1), outer_code);
15114 return true;
15117 /* FALLTHRU */
15119 case MINUS:
15120 if (FLOAT_MODE_P (mode))
15122 *total = COSTS_N_INSNS (ix86_cost->fadd);
15123 return false;
15125 /* FALLTHRU */
15127 case AND:
15128 case IOR:
15129 case XOR:
15130 if (!TARGET_64BIT && mode == DImode)
15132 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15133 + (rtx_cost (XEXP (x, 0), outer_code)
15134 << (GET_MODE (XEXP (x, 0)) != DImode))
15135 + (rtx_cost (XEXP (x, 1), outer_code)
15136 << (GET_MODE (XEXP (x, 1)) != DImode)));
15137 return true;
15139 /* FALLTHRU */
15141 case NEG:
15142 if (FLOAT_MODE_P (mode))
15144 *total = COSTS_N_INSNS (ix86_cost->fchs);
15145 return false;
15147 /* FALLTHRU */
15149 case NOT:
15150 if (!TARGET_64BIT && mode == DImode)
15151 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15152 else
15153 *total = COSTS_N_INSNS (ix86_cost->add);
15154 return false;
15156 case FLOAT_EXTEND:
15157 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15158 *total = 0;
15159 return false;
15161 case ABS:
15162 if (FLOAT_MODE_P (mode))
15163 *total = COSTS_N_INSNS (ix86_cost->fabs);
15164 return false;
15166 case SQRT:
15167 if (FLOAT_MODE_P (mode))
15168 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15169 return false;
15171 case UNSPEC:
15172 if (XINT (x, 1) == UNSPEC_TP)
15173 *total = 0;
15174 return false;
15176 default:
15177 return false;
15181 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15182 static void
15183 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15185 init_section ();
15186 fputs ("\tpushl $", asm_out_file);
15187 assemble_name (asm_out_file, XSTR (symbol, 0));
15188 fputc ('\n', asm_out_file);
15190 #endif
15192 #if TARGET_MACHO
15194 static int current_machopic_label_num;
15196 /* Given a symbol name and its associated stub, write out the
15197 definition of the stub. */
15199 void
15200 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15202 unsigned int length;
15203 char *binder_name, *symbol_name, lazy_ptr_name[32];
15204 int label = ++current_machopic_label_num;
15206 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15207 symb = (*targetm.strip_name_encoding) (symb);
15209 length = strlen (stub);
15210 binder_name = alloca (length + 32);
15211 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15213 length = strlen (symb);
15214 symbol_name = alloca (length + 32);
15215 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15217 sprintf (lazy_ptr_name, "L%d$lz", label);
15219 if (MACHOPIC_PURE)
15220 machopic_picsymbol_stub_section ();
15221 else
15222 machopic_symbol_stub_section ();
15224 fprintf (file, "%s:\n", stub);
15225 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15227 if (MACHOPIC_PURE)
15229 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15230 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15231 fprintf (file, "\tjmp %%edx\n");
15233 else
15234 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15236 fprintf (file, "%s:\n", binder_name);
15238 if (MACHOPIC_PURE)
15240 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15241 fprintf (file, "\tpushl %%eax\n");
15243 else
15244 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15246 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15248 machopic_lazy_symbol_ptr_section ();
15249 fprintf (file, "%s:\n", lazy_ptr_name);
15250 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15251 fprintf (file, "\t.long %s\n", binder_name);
15253 #endif /* TARGET_MACHO */
15255 /* Order the registers for register allocator. */
15257 void
15258 x86_order_regs_for_local_alloc (void)
15260 int pos = 0;
15261 int i;
15263 /* First allocate the local general purpose registers. */
15264 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15265 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15266 reg_alloc_order [pos++] = i;
15268 /* Global general purpose registers. */
15269 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15270 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15271 reg_alloc_order [pos++] = i;
15273 /* x87 registers come first in case we are doing FP math
15274 using them. */
15275 if (!TARGET_SSE_MATH)
15276 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15277 reg_alloc_order [pos++] = i;
15279 /* SSE registers. */
15280 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15281 reg_alloc_order [pos++] = i;
15282 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15283 reg_alloc_order [pos++] = i;
15285 /* x87 registers. */
15286 if (TARGET_SSE_MATH)
15287 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15288 reg_alloc_order [pos++] = i;
15290 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15291 reg_alloc_order [pos++] = i;
15293 /* Initialize the rest of array as we do not allocate some registers
15294 at all. */
15295 while (pos < FIRST_PSEUDO_REGISTER)
15296 reg_alloc_order [pos++] = 0;
15299 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15300 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15301 #endif
15303 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15304 struct attribute_spec.handler. */
15305 static tree
15306 ix86_handle_struct_attribute (tree *node, tree name,
15307 tree args ATTRIBUTE_UNUSED,
15308 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15310 tree *type = NULL;
15311 if (DECL_P (*node))
15313 if (TREE_CODE (*node) == TYPE_DECL)
15314 type = &TREE_TYPE (*node);
15316 else
15317 type = node;
15319 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15320 || TREE_CODE (*type) == UNION_TYPE)))
15322 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15323 *no_add_attrs = true;
15326 else if ((is_attribute_p ("ms_struct", name)
15327 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15328 || ((is_attribute_p ("gcc_struct", name)
15329 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15331 warning ("`%s' incompatible attribute ignored",
15332 IDENTIFIER_POINTER (name));
15333 *no_add_attrs = true;
15336 return NULL_TREE;
15339 static bool
15340 ix86_ms_bitfield_layout_p (tree record_type)
15342 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15343 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15344 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15347 /* Returns an expression indicating where the this parameter is
15348 located on entry to the FUNCTION. */
15350 static rtx
15351 x86_this_parameter (tree function)
15353 tree type = TREE_TYPE (function);
15355 if (TARGET_64BIT)
15357 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15358 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15361 if (ix86_function_regparm (type, function) > 0)
15363 tree parm;
15365 parm = TYPE_ARG_TYPES (type);
15366 /* Figure out whether or not the function has a variable number of
15367 arguments. */
15368 for (; parm; parm = TREE_CHAIN (parm))
15369 if (TREE_VALUE (parm) == void_type_node)
15370 break;
15371 /* If not, the this parameter is in the first argument. */
15372 if (parm)
15374 int regno = 0;
15375 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15376 regno = 2;
15377 return gen_rtx_REG (SImode, regno);
15381 if (aggregate_value_p (TREE_TYPE (type), type))
15382 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15383 else
15384 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15387 /* Determine whether x86_output_mi_thunk can succeed. */
15389 static bool
15390 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15391 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15392 HOST_WIDE_INT vcall_offset, tree function)
15394 /* 64-bit can handle anything. */
15395 if (TARGET_64BIT)
15396 return true;
15398 /* For 32-bit, everything's fine if we have one free register. */
15399 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15400 return true;
15402 /* Need a free register for vcall_offset. */
15403 if (vcall_offset)
15404 return false;
15406 /* Need a free register for GOT references. */
15407 if (flag_pic && !(*targetm.binds_local_p) (function))
15408 return false;
15410 /* Otherwise ok. */
15411 return true;
15414 /* Output the assembler code for a thunk function. THUNK_DECL is the
15415 declaration for the thunk function itself, FUNCTION is the decl for
15416 the target function. DELTA is an immediate constant offset to be
15417 added to THIS. If VCALL_OFFSET is nonzero, the word at
15418 *(*this + vcall_offset) should be added to THIS. */
15420 static void
15421 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15422 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15423 HOST_WIDE_INT vcall_offset, tree function)
15425 rtx xops[3];
15426 rtx this = x86_this_parameter (function);
15427 rtx this_reg, tmp;
15429 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15430 pull it in now and let DELTA benefit. */
15431 if (REG_P (this))
15432 this_reg = this;
15433 else if (vcall_offset)
15435 /* Put the this parameter into %eax. */
15436 xops[0] = this;
15437 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15438 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15440 else
15441 this_reg = NULL_RTX;
15443 /* Adjust the this parameter by a fixed constant. */
15444 if (delta)
15446 xops[0] = GEN_INT (delta);
15447 xops[1] = this_reg ? this_reg : this;
15448 if (TARGET_64BIT)
15450 if (!x86_64_general_operand (xops[0], DImode))
15452 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15453 xops[1] = tmp;
15454 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15455 xops[0] = tmp;
15456 xops[1] = this;
15458 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15460 else
15461 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15464 /* Adjust the this parameter by a value stored in the vtable. */
15465 if (vcall_offset)
15467 if (TARGET_64BIT)
15468 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15469 else
15471 int tmp_regno = 2 /* ECX */;
15472 if (lookup_attribute ("fastcall",
15473 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15474 tmp_regno = 0 /* EAX */;
15475 tmp = gen_rtx_REG (SImode, tmp_regno);
15478 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15479 xops[1] = tmp;
15480 if (TARGET_64BIT)
15481 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15482 else
15483 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15485 /* Adjust the this parameter. */
15486 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15487 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15489 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15490 xops[0] = GEN_INT (vcall_offset);
15491 xops[1] = tmp2;
15492 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15493 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15495 xops[1] = this_reg;
15496 if (TARGET_64BIT)
15497 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15498 else
15499 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15502 /* If necessary, drop THIS back to its stack slot. */
15503 if (this_reg && this_reg != this)
15505 xops[0] = this_reg;
15506 xops[1] = this;
15507 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15510 xops[0] = XEXP (DECL_RTL (function), 0);
15511 if (TARGET_64BIT)
15513 if (!flag_pic || (*targetm.binds_local_p) (function))
15514 output_asm_insn ("jmp\t%P0", xops);
15515 else
15517 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15518 tmp = gen_rtx_CONST (Pmode, tmp);
15519 tmp = gen_rtx_MEM (QImode, tmp);
15520 xops[0] = tmp;
15521 output_asm_insn ("jmp\t%A0", xops);
15524 else
15526 if (!flag_pic || (*targetm.binds_local_p) (function))
15527 output_asm_insn ("jmp\t%P0", xops);
15528 else
15529 #if TARGET_MACHO
15530 if (TARGET_MACHO)
15532 rtx sym_ref = XEXP (DECL_RTL (function), 0);
15533 tmp = (gen_rtx_SYMBOL_REF
15534 (Pmode,
15535 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
15536 tmp = gen_rtx_MEM (QImode, tmp);
15537 xops[0] = tmp;
15538 output_asm_insn ("jmp\t%0", xops);
15540 else
15541 #endif /* TARGET_MACHO */
15543 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15544 output_set_got (tmp);
15546 xops[1] = tmp;
15547 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15548 output_asm_insn ("jmp\t{*}%1", xops);
15553 static void
15554 x86_file_start (void)
15556 default_file_start ();
15557 if (X86_FILE_START_VERSION_DIRECTIVE)
15558 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15559 if (X86_FILE_START_FLTUSED)
15560 fputs ("\t.global\t__fltused\n", asm_out_file);
15561 if (ix86_asm_dialect == ASM_INTEL)
15562 fputs ("\t.intel_syntax\n", asm_out_file);
15566 x86_field_alignment (tree field, int computed)
15568 enum machine_mode mode;
15569 tree type = TREE_TYPE (field);
15571 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15572 return computed;
15573 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15574 ? get_inner_array_type (type) : type);
15575 if (mode == DFmode || mode == DCmode
15576 || GET_MODE_CLASS (mode) == MODE_INT
15577 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15578 return MIN (32, computed);
15579 return computed;
15582 /* Output assembler code to FILE to increment profiler label # LABELNO
15583 for profiling a function entry. */
15584 void
15585 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15587 if (TARGET_64BIT)
15588 if (flag_pic)
15590 #ifndef NO_PROFILE_COUNTERS
15591 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15592 #endif
15593 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15595 else
15597 #ifndef NO_PROFILE_COUNTERS
15598 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15599 #endif
15600 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15602 else if (flag_pic)
15604 #ifndef NO_PROFILE_COUNTERS
15605 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15606 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15607 #endif
15608 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15610 else
15612 #ifndef NO_PROFILE_COUNTERS
15613 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15614 PROFILE_COUNT_REGISTER);
15615 #endif
15616 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15620 /* We don't have exact information about the insn sizes, but we may assume
15621 quite safely that we are informed about all 1 byte insns and memory
15622 address sizes. This is enough to eliminate unnecessary padding in
15623 99% of cases. */
15625 static int
15626 min_insn_size (rtx insn)
15628 int l = 0;
15630 if (!INSN_P (insn) || !active_insn_p (insn))
15631 return 0;
15633 /* Discard alignments we've emit and jump instructions. */
15634 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15635 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15636 return 0;
15637 if (GET_CODE (insn) == JUMP_INSN
15638 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15639 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15640 return 0;
15642 /* Important case - calls are always 5 bytes.
15643 It is common to have many calls in the row. */
15644 if (GET_CODE (insn) == CALL_INSN
15645 && symbolic_reference_mentioned_p (PATTERN (insn))
15646 && !SIBLING_CALL_P (insn))
15647 return 5;
15648 if (get_attr_length (insn) <= 1)
15649 return 1;
15651 /* For normal instructions we may rely on the sizes of addresses
15652 and the presence of symbol to require 4 bytes of encoding.
15653 This is not the case for jumps where references are PC relative. */
15654 if (GET_CODE (insn) != JUMP_INSN)
15656 l = get_attr_length_address (insn);
15657 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15658 l = 4;
15660 if (l)
15661 return 1+l;
15662 else
15663 return 2;
15666 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15667 window. */
15669 static void
15670 ix86_avoid_jump_misspredicts (void)
15672 rtx insn, start = get_insns ();
15673 int nbytes = 0, njumps = 0;
15674 int isjump = 0;
15676 /* Look for all minimal intervals of instructions containing 4 jumps.
15677 The intervals are bounded by START and INSN. NBYTES is the total
15678 size of instructions in the interval including INSN and not including
15679 START. When the NBYTES is smaller than 16 bytes, it is possible
15680 that the end of START and INSN ends up in the same 16byte page.
15682 The smallest offset in the page INSN can start is the case where START
15683 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15684 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15686 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15689 nbytes += min_insn_size (insn);
15690 if (dump_file)
15691 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15692 INSN_UID (insn), min_insn_size (insn));
15693 if ((GET_CODE (insn) == JUMP_INSN
15694 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15695 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15696 || GET_CODE (insn) == CALL_INSN)
15697 njumps++;
15698 else
15699 continue;
15701 while (njumps > 3)
15703 start = NEXT_INSN (start);
15704 if ((GET_CODE (start) == JUMP_INSN
15705 && GET_CODE (PATTERN (start)) != ADDR_VEC
15706 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15707 || GET_CODE (start) == CALL_INSN)
15708 njumps--, isjump = 1;
15709 else
15710 isjump = 0;
15711 nbytes -= min_insn_size (start);
15713 if (njumps < 0)
15714 abort ();
15715 if (dump_file)
15716 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15717 INSN_UID (start), INSN_UID (insn), nbytes);
15719 if (njumps == 3 && isjump && nbytes < 16)
15721 int padsize = 15 - nbytes + min_insn_size (insn);
15723 if (dump_file)
15724 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15725 INSN_UID (insn), padsize);
15726 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15731 /* AMD Athlon works faster
15732 when RET is not destination of conditional jump or directly preceded
15733 by other jump instruction. We avoid the penalty by inserting NOP just
15734 before the RET instructions in such cases. */
15735 static void
15736 ix86_pad_returns (void)
15738 edge e;
15740 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15742 basic_block bb = e->src;
15743 rtx ret = BB_END (bb);
15744 rtx prev;
15745 bool replace = false;
15747 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15748 || !maybe_hot_bb_p (bb))
15749 continue;
15750 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15751 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15752 break;
15753 if (prev && GET_CODE (prev) == CODE_LABEL)
15755 edge e;
15756 for (e = bb->pred; e; e = e->pred_next)
15757 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15758 && !(e->flags & EDGE_FALLTHRU))
15759 replace = true;
15761 if (!replace)
15763 prev = prev_active_insn (ret);
15764 if (prev
15765 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15766 || GET_CODE (prev) == CALL_INSN))
15767 replace = true;
15768 /* Empty functions get branch mispredict even when the jump destination
15769 is not visible to us. */
15770 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15771 replace = true;
15773 if (replace)
15775 emit_insn_before (gen_return_internal_long (), ret);
15776 delete_insn (ret);
15781 /* Implement machine specific optimizations. We implement padding of returns
15782 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15783 static void
15784 ix86_reorg (void)
15786 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15787 ix86_pad_returns ();
15788 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15789 ix86_avoid_jump_misspredicts ();
15792 /* Return nonzero when QImode register that must be represented via REX prefix
15793 is used. */
15794 bool
15795 x86_extended_QIreg_mentioned_p (rtx insn)
15797 int i;
15798 extract_insn_cached (insn);
15799 for (i = 0; i < recog_data.n_operands; i++)
15800 if (REG_P (recog_data.operand[i])
15801 && REGNO (recog_data.operand[i]) >= 4)
15802 return true;
15803 return false;
15806 /* Return nonzero when P points to register encoded via REX prefix.
15807 Called via for_each_rtx. */
15808 static int
15809 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15811 unsigned int regno;
15812 if (!REG_P (*p))
15813 return 0;
15814 regno = REGNO (*p);
15815 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15818 /* Return true when INSN mentions register that must be encoded using REX
15819 prefix. */
15820 bool
15821 x86_extended_reg_mentioned_p (rtx insn)
15823 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15826 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15827 optabs would emit if we didn't have TFmode patterns. */
15829 void
15830 x86_emit_floatuns (rtx operands[2])
15832 rtx neglab, donelab, i0, i1, f0, in, out;
15833 enum machine_mode mode, inmode;
15835 inmode = GET_MODE (operands[1]);
15836 if (inmode != SImode
15837 && inmode != DImode)
15838 abort ();
15840 out = operands[0];
15841 in = force_reg (inmode, operands[1]);
15842 mode = GET_MODE (out);
15843 neglab = gen_label_rtx ();
15844 donelab = gen_label_rtx ();
15845 i1 = gen_reg_rtx (Pmode);
15846 f0 = gen_reg_rtx (mode);
15848 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15850 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15851 emit_jump_insn (gen_jump (donelab));
15852 emit_barrier ();
15854 emit_label (neglab);
15856 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15857 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15858 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15859 expand_float (f0, i0, 0);
15860 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15862 emit_label (donelab);
15865 /* Initialize vector TARGET via VALS. */
15866 void
15867 ix86_expand_vector_init (rtx target, rtx vals)
15869 enum machine_mode mode = GET_MODE (target);
15870 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15871 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15872 int i;
15874 for (i = n_elts - 1; i >= 0; i--)
15875 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15876 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15877 break;
15879 /* Few special cases first...
15880 ... constants are best loaded from constant pool. */
15881 if (i < 0)
15883 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15884 return;
15887 /* ... values where only first field is non-constant are best loaded
15888 from the pool and overwritten via move later. */
15889 if (!i)
15891 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15892 GET_MODE_INNER (mode), 0);
15894 op = force_reg (mode, op);
15895 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15896 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15897 switch (GET_MODE (target))
15899 case V2DFmode:
15900 emit_insn (gen_sse2_movsd (target, target, op));
15901 break;
15902 case V4SFmode:
15903 emit_insn (gen_sse_movss (target, target, op));
15904 break;
15905 default:
15906 break;
15908 return;
15911 /* And the busy sequence doing rotations. */
15912 switch (GET_MODE (target))
15914 case V2DFmode:
15916 rtx vecop0 =
15917 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15918 rtx vecop1 =
15919 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15921 vecop0 = force_reg (V2DFmode, vecop0);
15922 vecop1 = force_reg (V2DFmode, vecop1);
15923 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15925 break;
15926 case V4SFmode:
15928 rtx vecop0 =
15929 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15930 rtx vecop1 =
15931 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15932 rtx vecop2 =
15933 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15934 rtx vecop3 =
15935 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15936 rtx tmp1 = gen_reg_rtx (V4SFmode);
15937 rtx tmp2 = gen_reg_rtx (V4SFmode);
15939 vecop0 = force_reg (V4SFmode, vecop0);
15940 vecop1 = force_reg (V4SFmode, vecop1);
15941 vecop2 = force_reg (V4SFmode, vecop2);
15942 vecop3 = force_reg (V4SFmode, vecop3);
15943 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15944 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15945 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15947 break;
15948 default:
15949 abort ();
15953 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15955 We do this in the new i386 backend to maintain source compatibility
15956 with the old cc0-based compiler. */
15958 static tree
15959 ix86_md_asm_clobbers (tree clobbers)
15961 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15962 clobbers);
15963 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15964 clobbers);
15965 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15966 clobbers);
15967 return clobbers;
15970 /* Worker function for REVERSE_CONDITION. */
15972 enum rtx_code
15973 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15975 return (mode != CCFPmode && mode != CCFPUmode
15976 ? reverse_condition (code)
15977 : reverse_condition_maybe_unordered (code));
15980 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15981 to OPERANDS[0]. */
15983 const char *
15984 output_387_reg_move (rtx insn, rtx *operands)
15986 if (REG_P (operands[1])
15987 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15989 if (REGNO (operands[0]) == FIRST_STACK_REG
15990 && TARGET_USE_FFREEP)
15991 return "ffreep\t%y0";
15992 return "fstp\t%y0";
15994 if (STACK_TOP_P (operands[0]))
15995 return "fld%z1\t%y1";
15996 return "fst\t%y0";
15999 /* Output code to perform a conditional jump to LABEL, if C2 flag in
16000 FP status register is set. */
16002 void
16003 ix86_emit_fp_unordered_jump (rtx label)
16005 rtx reg = gen_reg_rtx (HImode);
16006 rtx temp;
16008 emit_insn (gen_x86_fnstsw_1 (reg));
16010 if (TARGET_USE_SAHF)
16012 emit_insn (gen_x86_sahf_1 (reg));
16014 temp = gen_rtx_REG (CCmode, FLAGS_REG);
16015 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
16017 else
16019 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
16021 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16022 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
16025 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
16026 gen_rtx_LABEL_REF (VOIDmode, label),
16027 pc_rtx);
16028 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
16029 emit_jump_insn (temp);
16032 /* Output code to perform a log1p XFmode calculation. */
16034 void ix86_emit_i387_log1p (rtx op0, rtx op1)
16036 rtx label1 = gen_label_rtx ();
16037 rtx label2 = gen_label_rtx ();
16039 rtx tmp = gen_reg_rtx (XFmode);
16040 rtx tmp2 = gen_reg_rtx (XFmode);
16042 emit_insn (gen_absxf2 (tmp, op1));
16043 emit_insn (gen_cmpxf (tmp,
16044 CONST_DOUBLE_FROM_REAL_VALUE (
16045 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16046 XFmode)));
16047 emit_jump_insn (gen_bge (label1));
16049 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16050 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
16051 emit_jump (label2);
16053 emit_label (label1);
16054 emit_move_insn (tmp, CONST1_RTX (XFmode));
16055 emit_insn (gen_addxf3 (tmp, op1, tmp));
16056 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16057 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16059 emit_label (label2);
16062 #include "gt-i386.h"