* config/i386/i386.c (classify_argument): Treat V1xx modes the same as
[official-gcc.git] / gcc / config / i386 / i386.c
bloba9ebf210910dea866fc9feb281988432d888acb1
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 2, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 2, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 const int x86_branch_hints = m_PENT4 | m_NOCONA;
531 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
532 const int x86_partial_reg_stall = m_PPRO;
533 const int x86_use_loop = m_K6;
534 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
535 const int x86_use_mov0 = m_K6;
536 const int x86_use_cltd = ~(m_PENT | m_K6);
537 const int x86_read_modify_write = ~m_PENT;
538 const int x86_read_modify = ~(m_PENT | m_PPRO);
539 const int x86_split_long_moves = m_PPRO;
540 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
541 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
542 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
543 const int x86_qimode_math = ~(0);
544 const int x86_promote_qi_regs = 0;
545 const int x86_himode_math = ~(m_PPRO);
546 const int x86_promote_hi_regs = m_PPRO;
547 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
555 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
557 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
558 const int x86_shift1 = ~m_486;
559 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs = m_ATHLON_K8;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568 const int x86_sse_typeless_stores = m_ATHLON_K8;
569 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
570 const int x86_use_ffreep = m_ATHLON_K8;
571 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
572 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
573 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
580 epilogue code. */
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
593 /* ax, dx, cx, bx */
594 AREG, DREG, CREG, BREG,
595 /* si, di, bp, sp */
596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
597 /* FP registers */
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
600 /* arg pointer */
601 NON_Q_REGS,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
605 SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
607 MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
633 static int const x86_64_int_return_registers[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
694 numbers.
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0 = NULL_RTX;
719 rtx ix86_compare_op1 = NULL_RTX;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry GTY(())
729 unsigned short mode;
730 unsigned short n;
731 rtx rtl;
732 struct stack_local_entry *next;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
738 [arguments]
739 <- ARG_POINTER
740 saved pc
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
744 [saved regs]
746 [padding1] \
748 [va_arg registers] (
749 > to_allocate <- FRAME_POINTER
750 [frame] (
752 [padding2] /
754 struct ix86_frame
756 int nregs;
757 int padding1;
758 int va_arg_size;
759 HOST_WIDE_INT frame;
760 int padding2;
761 int outgoing_arguments_size;
762 int red_zone_size;
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string;
779 /* Parsed value. */
780 enum cmodel ix86_cmodel;
781 /* Asm dialect. */
782 const char *ix86_asm_string;
783 enum asm_dialect ix86_asm_dialect = ASM_ATT;
784 /* TLS dialext. */
785 const char *ix86_tls_dialect_string;
786 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string; /* for -mtune=<xxx> */
798 const char *ix86_arch_string; /* for -march=<xxx> */
799 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
808 int ix86_regparm;
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string;
821 /* Preferred alignment for stack boundary in bits. */
822 int ix86_preferred_stack_boundary;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost;
826 const char *ix86_branch_cost_string;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 static char internal_label_prefix[16];
833 static int internal_label_prefix_len;
835 static int local_symbolic_operand (rtx, enum machine_mode);
836 static int tls_symbolic_operand_1 (rtx, enum tls_model);
837 static void output_pic_addr_const (FILE *, rtx, int);
838 static void put_condition_code (enum rtx_code, enum machine_mode,
839 int, int, FILE *);
840 static const char *get_some_local_dynamic_name (void);
841 static int get_some_local_dynamic_name_1 (rtx *, void *);
842 static rtx maybe_get_pool_constant (rtx);
843 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
844 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
845 rtx *);
846 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
848 enum machine_mode);
849 static rtx get_thread_pointer (int);
850 static rtx legitimize_tls_address (rtx, enum tls_model, int);
851 static void get_pc_thunk_name (char [32], unsigned int);
852 static rtx gen_push (rtx);
853 static int memory_address_length (rtx addr);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx x86_this_parameter (tree);
872 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878 static tree ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
881 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
883 struct ix86_address
885 rtx base, index, disp;
886 HOST_WIDE_INT scale;
887 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
890 static int ix86_decompose_address (rtx, struct ix86_address *);
891 static int ix86_address_cost (rtx);
892 static bool ix86_cannot_force_const_mem (rtx);
893 static rtx ix86_delegitimize_address (rtx);
895 struct builtin_description;
896 static rtx ix86_expand_sse_comi (const struct builtin_description *,
897 tree, rtx);
898 static rtx ix86_expand_sse_compare (const struct builtin_description *,
899 tree, rtx);
900 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
901 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
902 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
903 static rtx ix86_expand_store_builtin (enum insn_code, tree);
904 static rtx safe_vector_operand (rtx, enum machine_mode);
905 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
906 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
907 enum rtx_code *, enum rtx_code *);
908 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
912 static int ix86_fp_comparison_cost (enum rtx_code code);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame *);
916 static int ix86_comp_type_attributes (tree, tree);
917 static int ix86_function_regparm (tree, tree);
918 const struct attribute_spec ix86_attribute_table[];
919 static bool ix86_function_ok_for_sibcall (tree, tree);
920 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
921 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
922 static int ix86_value_regno (enum machine_mode);
923 static bool contains_128bit_aligned_vector_p (tree);
924 static rtx ix86_struct_value_rtx (tree, int);
925 static bool ix86_ms_bitfield_layout_p (tree);
926 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
927 static int extended_reg_mentioned_1 (rtx *, void *);
928 static bool ix86_rtx_costs (rtx, int, int, int *);
929 static int min_insn_size (rtx);
930 static tree ix86_md_asm_clobbers (tree clobbers);
932 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
933 static void ix86_svr3_asm_out_constructor (rtx, int);
934 #endif
936 /* Register class used for passing given 64bit part of the argument.
937 These represent classes as documented by the PS ABI, with the exception
938 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
939 use SF or DFmode move instead of DImode to avoid reformatting penalties.
941 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
942 whenever possible (upper half does contain padding).
944 enum x86_64_reg_class
946 X86_64_NO_CLASS,
947 X86_64_INTEGER_CLASS,
948 X86_64_INTEGERSI_CLASS,
949 X86_64_SSE_CLASS,
950 X86_64_SSESF_CLASS,
951 X86_64_SSEDF_CLASS,
952 X86_64_SSEUP_CLASS,
953 X86_64_X87_CLASS,
954 X86_64_X87UP_CLASS,
955 X86_64_MEMORY_CLASS
957 static const char * const x86_64_reg_class_name[] =
958 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
960 #define MAX_CLASSES 4
961 static int classify_argument (enum machine_mode, tree,
962 enum x86_64_reg_class [MAX_CLASSES], int);
963 static int examine_argument (enum machine_mode, tree, int, int *, int *);
964 static rtx construct_container (enum machine_mode, tree, int, int, int,
965 const int *, int);
966 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
967 enum x86_64_reg_class);
969 /* Table of constants used by fldpi, fldln2, etc.... */
970 static REAL_VALUE_TYPE ext_80387_constants_table [5];
971 static bool ext_80387_constants_init = 0;
972 static void init_ext_80387_constants (void);
974 /* Initialize the GCC target structure. */
975 #undef TARGET_ATTRIBUTE_TABLE
976 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
977 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
978 # undef TARGET_MERGE_DECL_ATTRIBUTES
979 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
980 #endif
982 #undef TARGET_COMP_TYPE_ATTRIBUTES
983 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
985 #undef TARGET_INIT_BUILTINS
986 #define TARGET_INIT_BUILTINS ix86_init_builtins
988 #undef TARGET_EXPAND_BUILTIN
989 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
991 #undef TARGET_ASM_FUNCTION_EPILOGUE
992 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
994 #undef TARGET_ASM_OPEN_PAREN
995 #define TARGET_ASM_OPEN_PAREN ""
996 #undef TARGET_ASM_CLOSE_PAREN
997 #define TARGET_ASM_CLOSE_PAREN ""
999 #undef TARGET_ASM_ALIGNED_HI_OP
1000 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1001 #undef TARGET_ASM_ALIGNED_SI_OP
1002 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1003 #ifdef ASM_QUAD
1004 #undef TARGET_ASM_ALIGNED_DI_OP
1005 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1006 #endif
1008 #undef TARGET_ASM_UNALIGNED_HI_OP
1009 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1010 #undef TARGET_ASM_UNALIGNED_SI_OP
1011 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1012 #undef TARGET_ASM_UNALIGNED_DI_OP
1013 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1015 #undef TARGET_SCHED_ADJUST_COST
1016 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1017 #undef TARGET_SCHED_ISSUE_RATE
1018 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1019 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
1020 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
1021 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1022 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1023 ia32_multipass_dfa_lookahead
1025 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1026 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1028 #ifdef HAVE_AS_TLS
1029 #undef TARGET_HAVE_TLS
1030 #define TARGET_HAVE_TLS true
1031 #endif
1032 #undef TARGET_CANNOT_FORCE_CONST_MEM
1033 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1035 #undef TARGET_DELEGITIMIZE_ADDRESS
1036 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1038 #undef TARGET_MS_BITFIELD_LAYOUT_P
1039 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1041 #undef TARGET_ASM_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1043 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1044 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1046 #undef TARGET_ASM_FILE_START
1047 #define TARGET_ASM_FILE_START x86_file_start
1049 #undef TARGET_RTX_COSTS
1050 #define TARGET_RTX_COSTS ix86_rtx_costs
1051 #undef TARGET_ADDRESS_COST
1052 #define TARGET_ADDRESS_COST ix86_address_cost
1054 #undef TARGET_FIXED_CONDITION_CODE_REGS
1055 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1056 #undef TARGET_CC_MODES_COMPATIBLE
1057 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1059 #undef TARGET_MACHINE_DEPENDENT_REORG
1060 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1062 #undef TARGET_BUILD_BUILTIN_VA_LIST
1063 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1065 #undef TARGET_MD_ASM_CLOBBERS
1066 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1068 #undef TARGET_PROMOTE_PROTOTYPES
1069 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1070 #undef TARGET_STRUCT_VALUE_RTX
1071 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1072 #undef TARGET_SETUP_INCOMING_VARARGS
1073 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1075 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1076 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1078 struct gcc_target targetm = TARGET_INITIALIZER;
1081 /* The svr4 ABI for the i386 says that records and unions are returned
1082 in memory. */
1083 #ifndef DEFAULT_PCC_STRUCT_RETURN
1084 #define DEFAULT_PCC_STRUCT_RETURN 1
1085 #endif
1087 /* Sometimes certain combinations of command options do not make
1088 sense on a particular target machine. You can define a macro
1089 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1090 defined, is executed once just after all the command options have
1091 been parsed.
1093 Don't use this macro to turn on various extra optimizations for
1094 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1096 void
1097 override_options (void)
1099 int i;
1100 /* Comes from final.c -- no real reason to change it. */
1101 #define MAX_CODE_ALIGN 16
1103 static struct ptt
1105 const struct processor_costs *cost; /* Processor costs */
1106 const int target_enable; /* Target flags to enable. */
1107 const int target_disable; /* Target flags to disable. */
1108 const int align_loop; /* Default alignments. */
1109 const int align_loop_max_skip;
1110 const int align_jump;
1111 const int align_jump_max_skip;
1112 const int align_func;
1114 const processor_target_table[PROCESSOR_max] =
1116 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1117 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1118 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1119 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1120 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1121 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1122 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1123 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1124 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1127 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1128 static struct pta
1130 const char *const name; /* processor name or nickname. */
1131 const enum processor_type processor;
1132 const enum pta_flags
1134 PTA_SSE = 1,
1135 PTA_SSE2 = 2,
1136 PTA_SSE3 = 4,
1137 PTA_MMX = 8,
1138 PTA_PREFETCH_SSE = 16,
1139 PTA_3DNOW = 32,
1140 PTA_3DNOW_A = 64,
1141 PTA_64BIT = 128
1142 } flags;
1144 const processor_alias_table[] =
1146 {"i386", PROCESSOR_I386, 0},
1147 {"i486", PROCESSOR_I486, 0},
1148 {"i586", PROCESSOR_PENTIUM, 0},
1149 {"pentium", PROCESSOR_PENTIUM, 0},
1150 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1151 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1152 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1153 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1154 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1155 {"i686", PROCESSOR_PENTIUMPRO, 0},
1156 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1157 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1158 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1159 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1160 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1161 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1162 | PTA_MMX | PTA_PREFETCH_SSE},
1163 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1164 | PTA_MMX | PTA_PREFETCH_SSE},
1165 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1166 | PTA_MMX | PTA_PREFETCH_SSE},
1167 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1168 | PTA_MMX | PTA_PREFETCH_SSE},
1169 {"k6", PROCESSOR_K6, PTA_MMX},
1170 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1171 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1172 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1173 | PTA_3DNOW_A},
1174 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1175 | PTA_3DNOW | PTA_3DNOW_A},
1176 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1177 | PTA_3DNOW_A | PTA_SSE},
1178 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1179 | PTA_3DNOW_A | PTA_SSE},
1180 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1181 | PTA_3DNOW_A | PTA_SSE},
1182 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1183 | PTA_SSE | PTA_SSE2 },
1184 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1185 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1186 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1187 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1188 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1189 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1190 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1191 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1194 int const pta_size = ARRAY_SIZE (processor_alias_table);
1196 /* Set the default values for switches whose default depends on TARGET_64BIT
1197 in case they weren't overwritten by command line options. */
1198 if (TARGET_64BIT)
1200 if (flag_omit_frame_pointer == 2)
1201 flag_omit_frame_pointer = 1;
1202 if (flag_asynchronous_unwind_tables == 2)
1203 flag_asynchronous_unwind_tables = 1;
1204 if (flag_pcc_struct_return == 2)
1205 flag_pcc_struct_return = 0;
1207 else
1209 if (flag_omit_frame_pointer == 2)
1210 flag_omit_frame_pointer = 0;
1211 if (flag_asynchronous_unwind_tables == 2)
1212 flag_asynchronous_unwind_tables = 0;
1213 if (flag_pcc_struct_return == 2)
1214 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1217 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1218 SUBTARGET_OVERRIDE_OPTIONS;
1219 #endif
1221 if (!ix86_tune_string && ix86_arch_string)
1222 ix86_tune_string = ix86_arch_string;
1223 if (!ix86_tune_string)
1224 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1225 if (!ix86_arch_string)
1226 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1228 if (ix86_cmodel_string != 0)
1230 if (!strcmp (ix86_cmodel_string, "small"))
1231 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1232 else if (flag_pic)
1233 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1234 else if (!strcmp (ix86_cmodel_string, "32"))
1235 ix86_cmodel = CM_32;
1236 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1237 ix86_cmodel = CM_KERNEL;
1238 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1239 ix86_cmodel = CM_MEDIUM;
1240 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1241 ix86_cmodel = CM_LARGE;
1242 else
1243 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1245 else
1247 ix86_cmodel = CM_32;
1248 if (TARGET_64BIT)
1249 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1251 if (ix86_asm_string != 0)
1253 if (!strcmp (ix86_asm_string, "intel"))
1254 ix86_asm_dialect = ASM_INTEL;
1255 else if (!strcmp (ix86_asm_string, "att"))
1256 ix86_asm_dialect = ASM_ATT;
1257 else
1258 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1260 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1261 error ("code model `%s' not supported in the %s bit mode",
1262 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1263 if (ix86_cmodel == CM_LARGE)
1264 sorry ("code model `large' not supported yet");
1265 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1266 sorry ("%i-bit mode not compiled in",
1267 (target_flags & MASK_64BIT) ? 64 : 32);
1269 for (i = 0; i < pta_size; i++)
1270 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1272 ix86_arch = processor_alias_table[i].processor;
1273 /* Default cpu tuning to the architecture. */
1274 ix86_tune = ix86_arch;
1275 if (processor_alias_table[i].flags & PTA_MMX
1276 && !(target_flags_explicit & MASK_MMX))
1277 target_flags |= MASK_MMX;
1278 if (processor_alias_table[i].flags & PTA_3DNOW
1279 && !(target_flags_explicit & MASK_3DNOW))
1280 target_flags |= MASK_3DNOW;
1281 if (processor_alias_table[i].flags & PTA_3DNOW_A
1282 && !(target_flags_explicit & MASK_3DNOW_A))
1283 target_flags |= MASK_3DNOW_A;
1284 if (processor_alias_table[i].flags & PTA_SSE
1285 && !(target_flags_explicit & MASK_SSE))
1286 target_flags |= MASK_SSE;
1287 if (processor_alias_table[i].flags & PTA_SSE2
1288 && !(target_flags_explicit & MASK_SSE2))
1289 target_flags |= MASK_SSE2;
1290 if (processor_alias_table[i].flags & PTA_SSE3
1291 && !(target_flags_explicit & MASK_SSE3))
1292 target_flags |= MASK_SSE3;
1293 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1294 x86_prefetch_sse = true;
1295 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1296 error ("CPU you selected does not support x86-64 instruction set");
1297 break;
1300 if (i == pta_size)
1301 error ("bad value (%s) for -march= switch", ix86_arch_string);
1303 for (i = 0; i < pta_size; i++)
1304 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1306 ix86_tune = processor_alias_table[i].processor;
1307 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1308 error ("CPU you selected does not support x86-64 instruction set");
1310 /* Intel CPUs have always interpreted SSE prefetch instructions as
1311 NOPs; so, we can enable SSE prefetch instructions even when
1312 -mtune (rather than -march) points us to a processor that has them.
1313 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1314 higher processors. */
1315 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1316 x86_prefetch_sse = true;
1317 break;
1319 if (i == pta_size)
1320 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1322 if (optimize_size)
1323 ix86_cost = &size_cost;
1324 else
1325 ix86_cost = processor_target_table[ix86_tune].cost;
1326 target_flags |= processor_target_table[ix86_tune].target_enable;
1327 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1329 /* Arrange to set up i386_stack_locals for all functions. */
1330 init_machine_status = ix86_init_machine_status;
1332 /* Validate -mregparm= value. */
1333 if (ix86_regparm_string)
1335 i = atoi (ix86_regparm_string);
1336 if (i < 0 || i > REGPARM_MAX)
1337 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1338 else
1339 ix86_regparm = i;
1341 else
1342 if (TARGET_64BIT)
1343 ix86_regparm = REGPARM_MAX;
1345 /* If the user has provided any of the -malign-* options,
1346 warn and use that value only if -falign-* is not set.
1347 Remove this code in GCC 3.2 or later. */
1348 if (ix86_align_loops_string)
1350 warning ("-malign-loops is obsolete, use -falign-loops");
1351 if (align_loops == 0)
1353 i = atoi (ix86_align_loops_string);
1354 if (i < 0 || i > MAX_CODE_ALIGN)
1355 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1356 else
1357 align_loops = 1 << i;
1361 if (ix86_align_jumps_string)
1363 warning ("-malign-jumps is obsolete, use -falign-jumps");
1364 if (align_jumps == 0)
1366 i = atoi (ix86_align_jumps_string);
1367 if (i < 0 || i > MAX_CODE_ALIGN)
1368 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1369 else
1370 align_jumps = 1 << i;
1374 if (ix86_align_funcs_string)
1376 warning ("-malign-functions is obsolete, use -falign-functions");
1377 if (align_functions == 0)
1379 i = atoi (ix86_align_funcs_string);
1380 if (i < 0 || i > MAX_CODE_ALIGN)
1381 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1382 else
1383 align_functions = 1 << i;
1387 /* Default align_* from the processor table. */
1388 if (align_loops == 0)
1390 align_loops = processor_target_table[ix86_tune].align_loop;
1391 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1393 if (align_jumps == 0)
1395 align_jumps = processor_target_table[ix86_tune].align_jump;
1396 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1398 if (align_functions == 0)
1400 align_functions = processor_target_table[ix86_tune].align_func;
1403 /* Validate -mpreferred-stack-boundary= value, or provide default.
1404 The default of 128 bits is for Pentium III's SSE __m128, but we
1405 don't want additional code to keep the stack aligned when
1406 optimizing for code size. */
1407 ix86_preferred_stack_boundary = (optimize_size
1408 ? TARGET_64BIT ? 128 : 32
1409 : 128);
1410 if (ix86_preferred_stack_boundary_string)
1412 i = atoi (ix86_preferred_stack_boundary_string);
1413 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1414 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1415 TARGET_64BIT ? 4 : 2);
1416 else
1417 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1420 /* Validate -mbranch-cost= value, or provide default. */
1421 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1422 if (ix86_branch_cost_string)
1424 i = atoi (ix86_branch_cost_string);
1425 if (i < 0 || i > 5)
1426 error ("-mbranch-cost=%d is not between 0 and 5", i);
1427 else
1428 ix86_branch_cost = i;
1431 if (ix86_tls_dialect_string)
1433 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1434 ix86_tls_dialect = TLS_DIALECT_GNU;
1435 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1436 ix86_tls_dialect = TLS_DIALECT_SUN;
1437 else
1438 error ("bad value (%s) for -mtls-dialect= switch",
1439 ix86_tls_dialect_string);
1442 /* Keep nonleaf frame pointers. */
1443 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1444 flag_omit_frame_pointer = 1;
1446 /* If we're doing fast math, we don't care about comparison order
1447 wrt NaNs. This lets us use a shorter comparison sequence. */
1448 if (flag_unsafe_math_optimizations)
1449 target_flags &= ~MASK_IEEE_FP;
1451 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1452 since the insns won't need emulation. */
1453 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1454 target_flags &= ~MASK_NO_FANCY_MATH_387;
1456 /* Turn on SSE2 builtins for -msse3. */
1457 if (TARGET_SSE3)
1458 target_flags |= MASK_SSE2;
1460 /* Turn on SSE builtins for -msse2. */
1461 if (TARGET_SSE2)
1462 target_flags |= MASK_SSE;
1464 if (TARGET_64BIT)
1466 if (TARGET_ALIGN_DOUBLE)
1467 error ("-malign-double makes no sense in the 64bit mode");
1468 if (TARGET_RTD)
1469 error ("-mrtd calling convention not supported in the 64bit mode");
1470 /* Enable by default the SSE and MMX builtins. */
1471 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1472 ix86_fpmath = FPMATH_SSE;
1474 else
1476 ix86_fpmath = FPMATH_387;
1477 /* i386 ABI does not specify red zone. It still makes sense to use it
1478 when programmer takes care to stack from being destroyed. */
1479 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1480 target_flags |= MASK_NO_RED_ZONE;
1483 if (ix86_fpmath_string != 0)
1485 if (! strcmp (ix86_fpmath_string, "387"))
1486 ix86_fpmath = FPMATH_387;
1487 else if (! strcmp (ix86_fpmath_string, "sse"))
1489 if (!TARGET_SSE)
1491 warning ("SSE instruction set disabled, using 387 arithmetics");
1492 ix86_fpmath = FPMATH_387;
1494 else
1495 ix86_fpmath = FPMATH_SSE;
1497 else if (! strcmp (ix86_fpmath_string, "387,sse")
1498 || ! strcmp (ix86_fpmath_string, "sse,387"))
1500 if (!TARGET_SSE)
1502 warning ("SSE instruction set disabled, using 387 arithmetics");
1503 ix86_fpmath = FPMATH_387;
1505 else if (!TARGET_80387)
1507 warning ("387 instruction set disabled, using SSE arithmetics");
1508 ix86_fpmath = FPMATH_SSE;
1510 else
1511 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1513 else
1514 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1517 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1518 on by -msse. */
1519 if (TARGET_SSE)
1521 target_flags |= MASK_MMX;
1522 x86_prefetch_sse = true;
1525 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1526 if (TARGET_3DNOW)
1528 target_flags |= MASK_MMX;
1529 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1530 extensions it adds. */
1531 if (x86_3dnow_a & (1 << ix86_arch))
1532 target_flags |= MASK_3DNOW_A;
1534 if ((x86_accumulate_outgoing_args & TUNEMASK)
1535 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1536 && !optimize_size)
1537 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1539 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1541 char *p;
1542 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1543 p = strchr (internal_label_prefix, 'X');
1544 internal_label_prefix_len = p - internal_label_prefix;
1545 *p = '\0';
1549 void
1550 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1552 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1553 make the problem with not enough registers even worse. */
1554 #ifdef INSN_SCHEDULING
1555 if (level > 1)
1556 flag_schedule_insns = 0;
1557 #endif
1559 /* The default values of these switches depend on the TARGET_64BIT
1560 that is not known at this moment. Mark these values with 2 and
1561 let user the to override these. In case there is no command line option
1562 specifying them, we will set the defaults in override_options. */
1563 if (optimize >= 1)
1564 flag_omit_frame_pointer = 2;
1565 flag_pcc_struct_return = 2;
1566 flag_asynchronous_unwind_tables = 2;
1569 /* Table of valid machine attributes. */
1570 const struct attribute_spec ix86_attribute_table[] =
1572 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1573 /* Stdcall attribute says callee is responsible for popping arguments
1574 if they are not variable. */
1575 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1576 /* Fastcall attribute says callee is responsible for popping arguments
1577 if they are not variable. */
1578 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1579 /* Cdecl attribute says the callee is a normal C declaration */
1580 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1581 /* Regparm attribute specifies how many integer arguments are to be
1582 passed in registers. */
1583 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1584 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1585 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1586 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1587 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1588 #endif
1589 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1590 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1591 { NULL, 0, 0, false, false, false, NULL }
1594 /* Decide whether we can make a sibling call to a function. DECL is the
1595 declaration of the function being targeted by the call and EXP is the
1596 CALL_EXPR representing the call. */
1598 static bool
1599 ix86_function_ok_for_sibcall (tree decl, tree exp)
1601 /* If we are generating position-independent code, we cannot sibcall
1602 optimize any indirect call, or a direct call to a global function,
1603 as the PLT requires %ebx be live. */
1604 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1605 return false;
1607 /* If we are returning floats on the 80387 register stack, we cannot
1608 make a sibcall from a function that doesn't return a float to a
1609 function that does or, conversely, from a function that does return
1610 a float to a function that doesn't; the necessary stack adjustment
1611 would not be executed. */
1612 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1613 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1614 return false;
1616 /* If this call is indirect, we'll need to be able to use a call-clobbered
1617 register for the address of the target function. Make sure that all
1618 such registers are not used for passing parameters. */
1619 if (!decl && !TARGET_64BIT)
1621 tree type;
1623 /* We're looking at the CALL_EXPR, we need the type of the function. */
1624 type = TREE_OPERAND (exp, 0); /* pointer expression */
1625 type = TREE_TYPE (type); /* pointer type */
1626 type = TREE_TYPE (type); /* function type */
1628 if (ix86_function_regparm (type, NULL) >= 3)
1630 /* ??? Need to count the actual number of registers to be used,
1631 not the possible number of registers. Fix later. */
1632 return false;
1636 /* Otherwise okay. That also includes certain types of indirect calls. */
1637 return true;
1640 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1641 arguments as in struct attribute_spec.handler. */
1642 static tree
1643 ix86_handle_cdecl_attribute (tree *node, tree name,
1644 tree args ATTRIBUTE_UNUSED,
1645 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1647 if (TREE_CODE (*node) != FUNCTION_TYPE
1648 && TREE_CODE (*node) != METHOD_TYPE
1649 && TREE_CODE (*node) != FIELD_DECL
1650 && TREE_CODE (*node) != TYPE_DECL)
1652 warning ("`%s' attribute only applies to functions",
1653 IDENTIFIER_POINTER (name));
1654 *no_add_attrs = true;
1656 else
1658 if (is_attribute_p ("fastcall", name))
1660 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1662 error ("fastcall and stdcall attributes are not compatible");
1664 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1666 error ("fastcall and regparm attributes are not compatible");
1669 else if (is_attribute_p ("stdcall", name))
1671 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1673 error ("fastcall and stdcall attributes are not compatible");
1678 if (TARGET_64BIT)
1680 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1681 *no_add_attrs = true;
1684 return NULL_TREE;
1687 /* Handle a "regparm" attribute;
1688 arguments as in struct attribute_spec.handler. */
1689 static tree
1690 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1691 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1693 if (TREE_CODE (*node) != FUNCTION_TYPE
1694 && TREE_CODE (*node) != METHOD_TYPE
1695 && TREE_CODE (*node) != FIELD_DECL
1696 && TREE_CODE (*node) != TYPE_DECL)
1698 warning ("`%s' attribute only applies to functions",
1699 IDENTIFIER_POINTER (name));
1700 *no_add_attrs = true;
1702 else
1704 tree cst;
1706 cst = TREE_VALUE (args);
1707 if (TREE_CODE (cst) != INTEGER_CST)
1709 warning ("`%s' attribute requires an integer constant argument",
1710 IDENTIFIER_POINTER (name));
1711 *no_add_attrs = true;
1713 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1715 warning ("argument to `%s' attribute larger than %d",
1716 IDENTIFIER_POINTER (name), REGPARM_MAX);
1717 *no_add_attrs = true;
1720 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1722 error ("fastcall and regparm attributes are not compatible");
1726 return NULL_TREE;
1729 /* Return 0 if the attributes for two types are incompatible, 1 if they
1730 are compatible, and 2 if they are nearly compatible (which causes a
1731 warning to be generated). */
1733 static int
1734 ix86_comp_type_attributes (tree type1, tree type2)
1736 /* Check for mismatch of non-default calling convention. */
1737 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1739 if (TREE_CODE (type1) != FUNCTION_TYPE)
1740 return 1;
1742 /* Check for mismatched fastcall types */
1743 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1744 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1745 return 0;
1747 /* Check for mismatched return types (cdecl vs stdcall). */
1748 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1749 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1750 return 0;
1751 if (ix86_function_regparm (type1, NULL)
1752 != ix86_function_regparm (type2, NULL))
1753 return 0;
1754 return 1;
1757 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1758 DECL may be NULL when calling function indirectly
1759 or considering a libcall. */
1761 static int
1762 ix86_function_regparm (tree type, tree decl)
1764 tree attr;
1765 int regparm = ix86_regparm;
1766 bool user_convention = false;
1768 if (!TARGET_64BIT)
1770 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1771 if (attr)
1773 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1774 user_convention = true;
1777 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1779 regparm = 2;
1780 user_convention = true;
1783 /* Use register calling convention for local functions when possible. */
1784 if (!TARGET_64BIT && !user_convention && decl
1785 && flag_unit_at_a_time && !profile_flag)
1787 struct cgraph_local_info *i = cgraph_local_info (decl);
1788 if (i && i->local)
1790 /* We can't use regparm(3) for nested functions as these use
1791 static chain pointer in third argument. */
1792 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1793 regparm = 2;
1794 else
1795 regparm = 3;
1799 return regparm;
1802 /* Return true if EAX is live at the start of the function. Used by
1803 ix86_expand_prologue to determine if we need special help before
1804 calling allocate_stack_worker. */
1806 static bool
1807 ix86_eax_live_at_start_p (void)
1809 /* Cheat. Don't bother working forward from ix86_function_regparm
1810 to the function type to whether an actual argument is located in
1811 eax. Instead just look at cfg info, which is still close enough
1812 to correct at this point. This gives false positives for broken
1813 functions that might use uninitialized data that happens to be
1814 allocated in eax, but who cares? */
1815 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1818 /* Value is the number of bytes of arguments automatically
1819 popped when returning from a subroutine call.
1820 FUNDECL is the declaration node of the function (as a tree),
1821 FUNTYPE is the data type of the function (as a tree),
1822 or for a library call it is an identifier node for the subroutine name.
1823 SIZE is the number of bytes of arguments passed on the stack.
1825 On the 80386, the RTD insn may be used to pop them if the number
1826 of args is fixed, but if the number is variable then the caller
1827 must pop them all. RTD can't be used for library calls now
1828 because the library is compiled with the Unix compiler.
1829 Use of RTD is a selectable option, since it is incompatible with
1830 standard Unix calling sequences. If the option is not selected,
1831 the caller must always pop the args.
1833 The attribute stdcall is equivalent to RTD on a per module basis. */
1836 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1838 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1840 /* Cdecl functions override -mrtd, and never pop the stack. */
1841 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1843 /* Stdcall and fastcall functions will pop the stack if not
1844 variable args. */
1845 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1846 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1847 rtd = 1;
1849 if (rtd
1850 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1851 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1852 == void_type_node)))
1853 return size;
1856 /* Lose any fake structure return argument if it is passed on the stack. */
1857 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1858 && !TARGET_64BIT)
1860 int nregs = ix86_function_regparm (funtype, fundecl);
1862 if (!nregs)
1863 return GET_MODE_SIZE (Pmode);
1866 return 0;
1869 /* Argument support functions. */
1871 /* Return true when register may be used to pass function parameters. */
1872 bool
1873 ix86_function_arg_regno_p (int regno)
1875 int i;
1876 if (!TARGET_64BIT)
1877 return (regno < REGPARM_MAX
1878 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1879 if (SSE_REGNO_P (regno) && TARGET_SSE)
1880 return true;
1881 /* RAX is used as hidden argument to va_arg functions. */
1882 if (!regno)
1883 return true;
1884 for (i = 0; i < REGPARM_MAX; i++)
1885 if (regno == x86_64_int_parameter_registers[i])
1886 return true;
1887 return false;
1890 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1891 for a call to a function whose data type is FNTYPE.
1892 For a library call, FNTYPE is 0. */
1894 void
1895 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1896 tree fntype, /* tree ptr for function decl */
1897 rtx libname, /* SYMBOL_REF of library name or 0 */
1898 tree fndecl)
1900 static CUMULATIVE_ARGS zero_cum;
1901 tree param, next_param;
1903 if (TARGET_DEBUG_ARG)
1905 fprintf (stderr, "\ninit_cumulative_args (");
1906 if (fntype)
1907 fprintf (stderr, "fntype code = %s, ret code = %s",
1908 tree_code_name[(int) TREE_CODE (fntype)],
1909 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1910 else
1911 fprintf (stderr, "no fntype");
1913 if (libname)
1914 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1917 *cum = zero_cum;
1919 /* Set up the number of registers to use for passing arguments. */
1920 if (fntype)
1921 cum->nregs = ix86_function_regparm (fntype, fndecl);
1922 else
1923 cum->nregs = ix86_regparm;
1924 cum->sse_nregs = SSE_REGPARM_MAX;
1925 cum->mmx_nregs = MMX_REGPARM_MAX;
1926 cum->warn_sse = true;
1927 cum->warn_mmx = true;
1928 cum->maybe_vaarg = false;
1930 /* Use ecx and edx registers if function has fastcall attribute */
1931 if (fntype && !TARGET_64BIT)
1933 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1935 cum->nregs = 2;
1936 cum->fastcall = 1;
1941 /* Determine if this function has variable arguments. This is
1942 indicated by the last argument being 'void_type_mode' if there
1943 are no variable arguments. If there are variable arguments, then
1944 we won't pass anything in registers */
1946 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1948 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1949 param != 0; param = next_param)
1951 next_param = TREE_CHAIN (param);
1952 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1954 if (!TARGET_64BIT)
1956 cum->nregs = 0;
1957 cum->sse_nregs = 0;
1958 cum->mmx_nregs = 0;
1959 cum->warn_sse = 0;
1960 cum->warn_mmx = 0;
1961 cum->fastcall = 0;
1963 cum->maybe_vaarg = true;
1967 if ((!fntype && !libname)
1968 || (fntype && !TYPE_ARG_TYPES (fntype)))
1969 cum->maybe_vaarg = 1;
1971 if (TARGET_DEBUG_ARG)
1972 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1974 return;
1977 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1978 of this code is to classify each 8bytes of incoming argument by the register
1979 class and assign registers accordingly. */
1981 /* Return the union class of CLASS1 and CLASS2.
1982 See the x86-64 PS ABI for details. */
1984 static enum x86_64_reg_class
1985 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1987 /* Rule #1: If both classes are equal, this is the resulting class. */
1988 if (class1 == class2)
1989 return class1;
1991 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1992 the other class. */
1993 if (class1 == X86_64_NO_CLASS)
1994 return class2;
1995 if (class2 == X86_64_NO_CLASS)
1996 return class1;
1998 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1999 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2000 return X86_64_MEMORY_CLASS;
2002 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2003 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2004 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2005 return X86_64_INTEGERSI_CLASS;
2006 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2007 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2008 return X86_64_INTEGER_CLASS;
2010 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2011 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2012 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2013 return X86_64_MEMORY_CLASS;
2015 /* Rule #6: Otherwise class SSE is used. */
2016 return X86_64_SSE_CLASS;
2019 /* Classify the argument of type TYPE and mode MODE.
2020 CLASSES will be filled by the register class used to pass each word
2021 of the operand. The number of words is returned. In case the parameter
2022 should be passed in memory, 0 is returned. As a special case for zero
2023 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2025 BIT_OFFSET is used internally for handling records and specifies offset
2026 of the offset in bits modulo 256 to avoid overflow cases.
2028 See the x86-64 PS ABI for details.
2031 static int
2032 classify_argument (enum machine_mode mode, tree type,
2033 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2035 HOST_WIDE_INT bytes =
2036 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2037 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2039 /* Variable sized entities are always passed/returned in memory. */
2040 if (bytes < 0)
2041 return 0;
2043 if (mode != VOIDmode
2044 && MUST_PASS_IN_STACK (mode, type))
2045 return 0;
2047 if (type && AGGREGATE_TYPE_P (type))
2049 int i;
2050 tree field;
2051 enum x86_64_reg_class subclasses[MAX_CLASSES];
2053 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2054 if (bytes > 16)
2055 return 0;
2057 for (i = 0; i < words; i++)
2058 classes[i] = X86_64_NO_CLASS;
2060 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2061 signalize memory class, so handle it as special case. */
2062 if (!words)
2064 classes[0] = X86_64_NO_CLASS;
2065 return 1;
2068 /* Classify each field of record and merge classes. */
2069 if (TREE_CODE (type) == RECORD_TYPE)
2071 /* For classes first merge in the field of the subclasses. */
2072 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2074 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2075 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2076 int i;
2078 for (i = 0; i < n_bases; ++i)
2080 tree binfo = TREE_VEC_ELT (bases, i);
2081 int num;
2082 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2083 tree type = BINFO_TYPE (binfo);
2085 num = classify_argument (TYPE_MODE (type),
2086 type, subclasses,
2087 (offset + bit_offset) % 256);
2088 if (!num)
2089 return 0;
2090 for (i = 0; i < num; i++)
2092 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2093 classes[i + pos] =
2094 merge_classes (subclasses[i], classes[i + pos]);
2098 /* And now merge the fields of structure. */
2099 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2101 if (TREE_CODE (field) == FIELD_DECL)
2103 int num;
2105 /* Bitfields are always classified as integer. Handle them
2106 early, since later code would consider them to be
2107 misaligned integers. */
2108 if (DECL_BIT_FIELD (field))
2110 for (i = int_bit_position (field) / 8 / 8;
2111 i < (int_bit_position (field)
2112 + tree_low_cst (DECL_SIZE (field), 0)
2113 + 63) / 8 / 8; i++)
2114 classes[i] =
2115 merge_classes (X86_64_INTEGER_CLASS,
2116 classes[i]);
2118 else
2120 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2121 TREE_TYPE (field), subclasses,
2122 (int_bit_position (field)
2123 + bit_offset) % 256);
2124 if (!num)
2125 return 0;
2126 for (i = 0; i < num; i++)
2128 int pos =
2129 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2130 classes[i + pos] =
2131 merge_classes (subclasses[i], classes[i + pos]);
2137 /* Arrays are handled as small records. */
2138 else if (TREE_CODE (type) == ARRAY_TYPE)
2140 int num;
2141 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2142 TREE_TYPE (type), subclasses, bit_offset);
2143 if (!num)
2144 return 0;
2146 /* The partial classes are now full classes. */
2147 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2148 subclasses[0] = X86_64_SSE_CLASS;
2149 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2150 subclasses[0] = X86_64_INTEGER_CLASS;
2152 for (i = 0; i < words; i++)
2153 classes[i] = subclasses[i % num];
2155 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2156 else if (TREE_CODE (type) == UNION_TYPE
2157 || TREE_CODE (type) == QUAL_UNION_TYPE)
2159 /* For classes first merge in the field of the subclasses. */
2160 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2162 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2163 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2164 int i;
2166 for (i = 0; i < n_bases; ++i)
2168 tree binfo = TREE_VEC_ELT (bases, i);
2169 int num;
2170 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2171 tree type = BINFO_TYPE (binfo);
2173 num = classify_argument (TYPE_MODE (type),
2174 type, subclasses,
2175 (offset + (bit_offset % 64)) % 256);
2176 if (!num)
2177 return 0;
2178 for (i = 0; i < num; i++)
2180 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2181 classes[i + pos] =
2182 merge_classes (subclasses[i], classes[i + pos]);
2186 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2188 if (TREE_CODE (field) == FIELD_DECL)
2190 int num;
2191 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2192 TREE_TYPE (field), subclasses,
2193 bit_offset);
2194 if (!num)
2195 return 0;
2196 for (i = 0; i < num; i++)
2197 classes[i] = merge_classes (subclasses[i], classes[i]);
2201 else if (TREE_CODE (type) == SET_TYPE)
2203 if (bytes <= 4)
2205 classes[0] = X86_64_INTEGERSI_CLASS;
2206 return 1;
2208 else if (bytes <= 8)
2210 classes[0] = X86_64_INTEGER_CLASS;
2211 return 1;
2213 else if (bytes <= 12)
2215 classes[0] = X86_64_INTEGER_CLASS;
2216 classes[1] = X86_64_INTEGERSI_CLASS;
2217 return 2;
2219 else
2221 classes[0] = X86_64_INTEGER_CLASS;
2222 classes[1] = X86_64_INTEGER_CLASS;
2223 return 2;
2226 else
2227 abort ();
2229 /* Final merger cleanup. */
2230 for (i = 0; i < words; i++)
2232 /* If one class is MEMORY, everything should be passed in
2233 memory. */
2234 if (classes[i] == X86_64_MEMORY_CLASS)
2235 return 0;
2237 /* The X86_64_SSEUP_CLASS should be always preceded by
2238 X86_64_SSE_CLASS. */
2239 if (classes[i] == X86_64_SSEUP_CLASS
2240 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2241 classes[i] = X86_64_SSE_CLASS;
2243 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2244 if (classes[i] == X86_64_X87UP_CLASS
2245 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2246 classes[i] = X86_64_SSE_CLASS;
2248 return words;
2251 /* Compute alignment needed. We align all types to natural boundaries with
2252 exception of XFmode that is aligned to 64bits. */
2253 if (mode != VOIDmode && mode != BLKmode)
2255 int mode_alignment = GET_MODE_BITSIZE (mode);
2257 if (mode == XFmode)
2258 mode_alignment = 128;
2259 else if (mode == XCmode)
2260 mode_alignment = 256;
2261 if (COMPLEX_MODE_P (mode))
2262 mode_alignment /= 2;
2263 /* Misaligned fields are always returned in memory. */
2264 if (bit_offset % mode_alignment)
2265 return 0;
2268 /* for V1xx modes, just use the base mode */
2269 if (VECTOR_MODE_P (mode)
2270 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2271 mode = GET_MODE_INNER (mode);
2273 /* Classification of atomic types. */
2274 switch (mode)
2276 case DImode:
2277 case SImode:
2278 case HImode:
2279 case QImode:
2280 case CSImode:
2281 case CHImode:
2282 case CQImode:
2283 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2284 classes[0] = X86_64_INTEGERSI_CLASS;
2285 else
2286 classes[0] = X86_64_INTEGER_CLASS;
2287 return 1;
2288 case CDImode:
2289 case TImode:
2290 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2291 return 2;
2292 case CTImode:
2293 return 0;
2294 case SFmode:
2295 if (!(bit_offset % 64))
2296 classes[0] = X86_64_SSESF_CLASS;
2297 else
2298 classes[0] = X86_64_SSE_CLASS;
2299 return 1;
2300 case DFmode:
2301 classes[0] = X86_64_SSEDF_CLASS;
2302 return 1;
2303 case XFmode:
2304 classes[0] = X86_64_X87_CLASS;
2305 classes[1] = X86_64_X87UP_CLASS;
2306 return 2;
2307 case TFmode:
2308 classes[0] = X86_64_SSE_CLASS;
2309 classes[1] = X86_64_SSEUP_CLASS;
2310 return 2;
2311 case SCmode:
2312 classes[0] = X86_64_SSE_CLASS;
2313 return 1;
2314 case DCmode:
2315 classes[0] = X86_64_SSEDF_CLASS;
2316 classes[1] = X86_64_SSEDF_CLASS;
2317 return 2;
2318 case XCmode:
2319 case TCmode:
2320 /* These modes are larger than 16 bytes. */
2321 return 0;
2322 case V4SFmode:
2323 case V4SImode:
2324 case V16QImode:
2325 case V8HImode:
2326 case V2DFmode:
2327 case V2DImode:
2328 classes[0] = X86_64_SSE_CLASS;
2329 classes[1] = X86_64_SSEUP_CLASS;
2330 return 2;
2331 case V2SFmode:
2332 case V2SImode:
2333 case V4HImode:
2334 case V8QImode:
2335 classes[0] = X86_64_SSE_CLASS;
2336 return 1;
2337 case BLKmode:
2338 case VOIDmode:
2339 return 0;
2340 default:
2341 if (VECTOR_MODE_P (mode))
2343 if (bytes > 16)
2344 return 0;
2345 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2347 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2348 classes[0] = X86_64_INTEGERSI_CLASS;
2349 else
2350 classes[0] = X86_64_INTEGER_CLASS;
2351 classes[1] = X86_64_INTEGER_CLASS;
2352 return 1 + (bytes > 8);
2355 abort ();
2359 /* Examine the argument and return set number of register required in each
2360 class. Return 0 iff parameter should be passed in memory. */
2361 static int
2362 examine_argument (enum machine_mode mode, tree type, int in_return,
2363 int *int_nregs, int *sse_nregs)
2365 enum x86_64_reg_class class[MAX_CLASSES];
2366 int n = classify_argument (mode, type, class, 0);
2368 *int_nregs = 0;
2369 *sse_nregs = 0;
2370 if (!n)
2371 return 0;
2372 for (n--; n >= 0; n--)
2373 switch (class[n])
2375 case X86_64_INTEGER_CLASS:
2376 case X86_64_INTEGERSI_CLASS:
2377 (*int_nregs)++;
2378 break;
2379 case X86_64_SSE_CLASS:
2380 case X86_64_SSESF_CLASS:
2381 case X86_64_SSEDF_CLASS:
2382 (*sse_nregs)++;
2383 break;
2384 case X86_64_NO_CLASS:
2385 case X86_64_SSEUP_CLASS:
2386 break;
2387 case X86_64_X87_CLASS:
2388 case X86_64_X87UP_CLASS:
2389 if (!in_return)
2390 return 0;
2391 break;
2392 case X86_64_MEMORY_CLASS:
2393 abort ();
2395 return 1;
2397 /* Construct container for the argument used by GCC interface. See
2398 FUNCTION_ARG for the detailed description. */
2399 static rtx
2400 construct_container (enum machine_mode mode, tree type, int in_return,
2401 int nintregs, int nsseregs, const int * intreg,
2402 int sse_regno)
2404 enum machine_mode tmpmode;
2405 int bytes =
2406 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2407 enum x86_64_reg_class class[MAX_CLASSES];
2408 int n;
2409 int i;
2410 int nexps = 0;
2411 int needed_sseregs, needed_intregs;
2412 rtx exp[MAX_CLASSES];
2413 rtx ret;
2415 n = classify_argument (mode, type, class, 0);
2416 if (TARGET_DEBUG_ARG)
2418 if (!n)
2419 fprintf (stderr, "Memory class\n");
2420 else
2422 fprintf (stderr, "Classes:");
2423 for (i = 0; i < n; i++)
2425 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2427 fprintf (stderr, "\n");
2430 if (!n)
2431 return NULL;
2432 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2433 return NULL;
2434 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2435 return NULL;
2437 /* First construct simple cases. Avoid SCmode, since we want to use
2438 single register to pass this type. */
2439 if (n == 1 && mode != SCmode)
2440 switch (class[0])
2442 case X86_64_INTEGER_CLASS:
2443 case X86_64_INTEGERSI_CLASS:
2444 return gen_rtx_REG (mode, intreg[0]);
2445 case X86_64_SSE_CLASS:
2446 case X86_64_SSESF_CLASS:
2447 case X86_64_SSEDF_CLASS:
2448 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2449 case X86_64_X87_CLASS:
2450 return gen_rtx_REG (mode, FIRST_STACK_REG);
2451 case X86_64_NO_CLASS:
2452 /* Zero sized array, struct or class. */
2453 return NULL;
2454 default:
2455 abort ();
2457 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2458 && mode != BLKmode)
2459 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2460 if (n == 2
2461 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2462 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2463 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2464 && class[1] == X86_64_INTEGER_CLASS
2465 && (mode == CDImode || mode == TImode || mode == TFmode)
2466 && intreg[0] + 1 == intreg[1])
2467 return gen_rtx_REG (mode, intreg[0]);
2468 if (n == 4
2469 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2470 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2471 && mode != BLKmode)
2472 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2474 /* Otherwise figure out the entries of the PARALLEL. */
2475 for (i = 0; i < n; i++)
2477 switch (class[i])
2479 case X86_64_NO_CLASS:
2480 break;
2481 case X86_64_INTEGER_CLASS:
2482 case X86_64_INTEGERSI_CLASS:
2483 /* Merge TImodes on aligned occasions here too. */
2484 if (i * 8 + 8 > bytes)
2485 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2486 else if (class[i] == X86_64_INTEGERSI_CLASS)
2487 tmpmode = SImode;
2488 else
2489 tmpmode = DImode;
2490 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2491 if (tmpmode == BLKmode)
2492 tmpmode = DImode;
2493 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2494 gen_rtx_REG (tmpmode, *intreg),
2495 GEN_INT (i*8));
2496 intreg++;
2497 break;
2498 case X86_64_SSESF_CLASS:
2499 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2500 gen_rtx_REG (SFmode,
2501 SSE_REGNO (sse_regno)),
2502 GEN_INT (i*8));
2503 sse_regno++;
2504 break;
2505 case X86_64_SSEDF_CLASS:
2506 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2507 gen_rtx_REG (DFmode,
2508 SSE_REGNO (sse_regno)),
2509 GEN_INT (i*8));
2510 sse_regno++;
2511 break;
2512 case X86_64_SSE_CLASS:
2513 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2514 tmpmode = TImode;
2515 else
2516 tmpmode = DImode;
2517 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2518 gen_rtx_REG (tmpmode,
2519 SSE_REGNO (sse_regno)),
2520 GEN_INT (i*8));
2521 if (tmpmode == TImode)
2522 i++;
2523 sse_regno++;
2524 break;
2525 default:
2526 abort ();
2529 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2530 for (i = 0; i < nexps; i++)
2531 XVECEXP (ret, 0, i) = exp [i];
2532 return ret;
2535 /* Update the data in CUM to advance over an argument
2536 of mode MODE and data type TYPE.
2537 (TYPE is null for libcalls where that information may not be available.) */
2539 void
2540 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2541 enum machine_mode mode, /* current arg mode */
2542 tree type, /* type of the argument or 0 if lib support */
2543 int named) /* whether or not the argument was named */
2545 int bytes =
2546 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2547 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2549 if (TARGET_DEBUG_ARG)
2550 fprintf (stderr,
2551 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2552 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2553 if (TARGET_64BIT)
2555 int int_nregs, sse_nregs;
2556 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2557 cum->words += words;
2558 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2560 cum->nregs -= int_nregs;
2561 cum->sse_nregs -= sse_nregs;
2562 cum->regno += int_nregs;
2563 cum->sse_regno += sse_nregs;
2565 else
2566 cum->words += words;
2568 else
2570 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2571 && (!type || !AGGREGATE_TYPE_P (type)))
2573 cum->sse_words += words;
2574 cum->sse_nregs -= 1;
2575 cum->sse_regno += 1;
2576 if (cum->sse_nregs <= 0)
2578 cum->sse_nregs = 0;
2579 cum->sse_regno = 0;
2582 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2583 && (!type || !AGGREGATE_TYPE_P (type)))
2585 cum->mmx_words += words;
2586 cum->mmx_nregs -= 1;
2587 cum->mmx_regno += 1;
2588 if (cum->mmx_nregs <= 0)
2590 cum->mmx_nregs = 0;
2591 cum->mmx_regno = 0;
2594 else
2596 cum->words += words;
2597 cum->nregs -= words;
2598 cum->regno += words;
2600 if (cum->nregs <= 0)
2602 cum->nregs = 0;
2603 cum->regno = 0;
2607 return;
2610 /* Define where to put the arguments to a function.
2611 Value is zero to push the argument on the stack,
2612 or a hard register in which to store the argument.
2614 MODE is the argument's machine mode.
2615 TYPE is the data type of the argument (as a tree).
2616 This is null for libcalls where that information may
2617 not be available.
2618 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2619 the preceding args and about the function being called.
2620 NAMED is nonzero if this argument is a named parameter
2621 (otherwise it is an extra parameter matching an ellipsis). */
2624 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2625 enum machine_mode mode, /* current arg mode */
2626 tree type, /* type of the argument or 0 if lib support */
2627 int named) /* != 0 for normal args, == 0 for ... args */
2629 rtx ret = NULL_RTX;
2630 int bytes =
2631 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2632 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2633 static bool warnedsse, warnedmmx;
2635 /* Handle a hidden AL argument containing number of registers for varargs
2636 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2637 any AL settings. */
2638 if (mode == VOIDmode)
2640 if (TARGET_64BIT)
2641 return GEN_INT (cum->maybe_vaarg
2642 ? (cum->sse_nregs < 0
2643 ? SSE_REGPARM_MAX
2644 : cum->sse_regno)
2645 : -1);
2646 else
2647 return constm1_rtx;
2649 if (TARGET_64BIT)
2650 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2651 &x86_64_int_parameter_registers [cum->regno],
2652 cum->sse_regno);
2653 else
2654 switch (mode)
2656 /* For now, pass fp/complex values on the stack. */
2657 default:
2658 break;
2660 case BLKmode:
2661 if (bytes < 0)
2662 break;
2663 /* FALLTHRU */
2664 case DImode:
2665 case SImode:
2666 case HImode:
2667 case QImode:
2668 if (words <= cum->nregs)
2670 int regno = cum->regno;
2672 /* Fastcall allocates the first two DWORD (SImode) or
2673 smaller arguments to ECX and EDX. */
2674 if (cum->fastcall)
2676 if (mode == BLKmode || mode == DImode)
2677 break;
2679 /* ECX not EAX is the first allocated register. */
2680 if (regno == 0)
2681 regno = 2;
2683 ret = gen_rtx_REG (mode, regno);
2685 break;
2686 case TImode:
2687 case V16QImode:
2688 case V8HImode:
2689 case V4SImode:
2690 case V2DImode:
2691 case V4SFmode:
2692 case V2DFmode:
2693 if (!type || !AGGREGATE_TYPE_P (type))
2695 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2697 warnedsse = true;
2698 warning ("SSE vector argument without SSE enabled "
2699 "changes the ABI");
2701 if (cum->sse_nregs)
2702 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2704 break;
2705 case V8QImode:
2706 case V4HImode:
2707 case V2SImode:
2708 case V2SFmode:
2709 if (!type || !AGGREGATE_TYPE_P (type))
2711 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2713 warnedmmx = true;
2714 warning ("MMX vector argument without MMX enabled "
2715 "changes the ABI");
2717 if (cum->mmx_nregs)
2718 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2720 break;
2723 if (TARGET_DEBUG_ARG)
2725 fprintf (stderr,
2726 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2727 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2729 if (ret)
2730 print_simple_rtl (stderr, ret);
2731 else
2732 fprintf (stderr, ", stack");
2734 fprintf (stderr, " )\n");
2737 return ret;
2740 /* A C expression that indicates when an argument must be passed by
2741 reference. If nonzero for an argument, a copy of that argument is
2742 made in memory and a pointer to the argument is passed instead of
2743 the argument itself. The pointer is passed in whatever way is
2744 appropriate for passing a pointer to that type. */
2747 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2748 enum machine_mode mode ATTRIBUTE_UNUSED,
2749 tree type, int named ATTRIBUTE_UNUSED)
2751 if (!TARGET_64BIT)
2752 return 0;
2754 if (type && int_size_in_bytes (type) == -1)
2756 if (TARGET_DEBUG_ARG)
2757 fprintf (stderr, "function_arg_pass_by_reference\n");
2758 return 1;
2761 return 0;
2764 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2765 ABI */
2766 static bool
2767 contains_128bit_aligned_vector_p (tree type)
2769 enum machine_mode mode = TYPE_MODE (type);
2770 if (SSE_REG_MODE_P (mode)
2771 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2772 return true;
2773 if (TYPE_ALIGN (type) < 128)
2774 return false;
2776 if (AGGREGATE_TYPE_P (type))
2778 /* Walk the aggregates recursively. */
2779 if (TREE_CODE (type) == RECORD_TYPE
2780 || TREE_CODE (type) == UNION_TYPE
2781 || TREE_CODE (type) == QUAL_UNION_TYPE)
2783 tree field;
2785 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2787 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2788 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2789 int i;
2791 for (i = 0; i < n_bases; ++i)
2793 tree binfo = TREE_VEC_ELT (bases, i);
2794 tree type = BINFO_TYPE (binfo);
2796 if (contains_128bit_aligned_vector_p (type))
2797 return true;
2800 /* And now merge the fields of structure. */
2801 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2803 if (TREE_CODE (field) == FIELD_DECL
2804 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2805 return true;
2808 /* Just for use if some languages passes arrays by value. */
2809 else if (TREE_CODE (type) == ARRAY_TYPE)
2811 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2812 return true;
2814 else
2815 abort ();
2817 return false;
2820 /* Gives the alignment boundary, in bits, of an argument with the
2821 specified mode and type. */
2824 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2826 int align;
2827 if (type)
2828 align = TYPE_ALIGN (type);
2829 else
2830 align = GET_MODE_ALIGNMENT (mode);
2831 if (align < PARM_BOUNDARY)
2832 align = PARM_BOUNDARY;
2833 if (!TARGET_64BIT)
2835 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2836 make an exception for SSE modes since these require 128bit
2837 alignment.
2839 The handling here differs from field_alignment. ICC aligns MMX
2840 arguments to 4 byte boundaries, while structure fields are aligned
2841 to 8 byte boundaries. */
2842 if (!type)
2844 if (!SSE_REG_MODE_P (mode))
2845 align = PARM_BOUNDARY;
2847 else
2849 if (!contains_128bit_aligned_vector_p (type))
2850 align = PARM_BOUNDARY;
2853 if (align > 128)
2854 align = 128;
2855 return align;
2858 /* Return true if N is a possible register number of function value. */
2859 bool
2860 ix86_function_value_regno_p (int regno)
2862 if (!TARGET_64BIT)
2864 return ((regno) == 0
2865 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2866 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2868 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2869 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2870 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2873 /* Define how to find the value returned by a function.
2874 VALTYPE is the data type of the value (as a tree).
2875 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2876 otherwise, FUNC is 0. */
2878 ix86_function_value (tree valtype)
2880 if (TARGET_64BIT)
2882 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2883 REGPARM_MAX, SSE_REGPARM_MAX,
2884 x86_64_int_return_registers, 0);
2885 /* For zero sized structures, construct_container return NULL, but we need
2886 to keep rest of compiler happy by returning meaningful value. */
2887 if (!ret)
2888 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2889 return ret;
2891 else
2892 return gen_rtx_REG (TYPE_MODE (valtype),
2893 ix86_value_regno (TYPE_MODE (valtype)));
2896 /* Return false iff type is returned in memory. */
2898 ix86_return_in_memory (tree type)
2900 int needed_intregs, needed_sseregs, size;
2901 enum machine_mode mode = TYPE_MODE (type);
2903 if (TARGET_64BIT)
2904 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2906 if (mode == BLKmode)
2907 return 1;
2909 size = int_size_in_bytes (type);
2911 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2912 return 0;
2914 if (VECTOR_MODE_P (mode) || mode == TImode)
2916 /* User-created vectors small enough to fit in EAX. */
2917 if (size < 8)
2918 return 0;
2920 /* MMX/3dNow values are returned on the stack, since we've
2921 got to EMMS/FEMMS before returning. */
2922 if (size == 8)
2923 return 1;
2925 /* SSE values are returned in XMM0, except when it doesn't exist. */
2926 if (size == 16)
2927 return (TARGET_SSE ? 0 : 1);
2930 if (mode == XFmode)
2931 return 0;
2933 if (size > 12)
2934 return 1;
2935 return 0;
2938 /* When returning SSE vector types, we have a choice of either
2939 (1) being abi incompatible with a -march switch, or
2940 (2) generating an error.
2941 Given no good solution, I think the safest thing is one warning.
2942 The user won't be able to use -Werror, but....
2944 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2945 called in response to actually generating a caller or callee that
2946 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2947 via aggregate_value_p for general type probing from tree-ssa. */
2949 static rtx
2950 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
2952 static bool warned;
2954 if (!TARGET_SSE && type && !warned)
2956 /* Look at the return type of the function, not the function type. */
2957 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
2959 if (mode == TImode
2960 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2962 warned = true;
2963 warning ("SSE vector return without SSE enabled changes the ABI");
2967 return NULL;
2970 /* Define how to find the value returned by a library function
2971 assuming the value has mode MODE. */
2973 ix86_libcall_value (enum machine_mode mode)
2975 if (TARGET_64BIT)
2977 switch (mode)
2979 case SFmode:
2980 case SCmode:
2981 case DFmode:
2982 case DCmode:
2983 case TFmode:
2984 return gen_rtx_REG (mode, FIRST_SSE_REG);
2985 case XFmode:
2986 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2987 case XCmode:
2988 case TCmode:
2989 return NULL;
2990 default:
2991 return gen_rtx_REG (mode, 0);
2994 else
2995 return gen_rtx_REG (mode, ix86_value_regno (mode));
2998 /* Given a mode, return the register to use for a return value. */
3000 static int
3001 ix86_value_regno (enum machine_mode mode)
3003 /* Floating point return values in %st(0). */
3004 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3005 return FIRST_FLOAT_REG;
3006 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3007 we prevent this case when sse is not available. */
3008 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3009 return FIRST_SSE_REG;
3010 /* Everything else in %eax. */
3011 return 0;
3014 /* Create the va_list data type. */
3016 static tree
3017 ix86_build_builtin_va_list (void)
3019 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3021 /* For i386 we use plain pointer to argument area. */
3022 if (!TARGET_64BIT)
3023 return build_pointer_type (char_type_node);
3025 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3026 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3028 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3029 unsigned_type_node);
3030 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3031 unsigned_type_node);
3032 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3033 ptr_type_node);
3034 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3035 ptr_type_node);
3037 DECL_FIELD_CONTEXT (f_gpr) = record;
3038 DECL_FIELD_CONTEXT (f_fpr) = record;
3039 DECL_FIELD_CONTEXT (f_ovf) = record;
3040 DECL_FIELD_CONTEXT (f_sav) = record;
3042 TREE_CHAIN (record) = type_decl;
3043 TYPE_NAME (record) = type_decl;
3044 TYPE_FIELDS (record) = f_gpr;
3045 TREE_CHAIN (f_gpr) = f_fpr;
3046 TREE_CHAIN (f_fpr) = f_ovf;
3047 TREE_CHAIN (f_ovf) = f_sav;
3049 layout_type (record);
3051 /* The correct type is an array type of one element. */
3052 return build_array_type (record, build_index_type (size_zero_node));
3055 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3057 static void
3058 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3059 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3060 int no_rtl)
3062 CUMULATIVE_ARGS next_cum;
3063 rtx save_area = NULL_RTX, mem;
3064 rtx label;
3065 rtx label_ref;
3066 rtx tmp_reg;
3067 rtx nsse_reg;
3068 int set;
3069 tree fntype;
3070 int stdarg_p;
3071 int i;
3073 if (!TARGET_64BIT)
3074 return;
3076 /* Indicate to allocate space on the stack for varargs save area. */
3077 ix86_save_varrargs_registers = 1;
3079 cfun->stack_alignment_needed = 128;
3081 fntype = TREE_TYPE (current_function_decl);
3082 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3083 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3084 != void_type_node));
3086 /* For varargs, we do not want to skip the dummy va_dcl argument.
3087 For stdargs, we do want to skip the last named argument. */
3088 next_cum = *cum;
3089 if (stdarg_p)
3090 function_arg_advance (&next_cum, mode, type, 1);
3092 if (!no_rtl)
3093 save_area = frame_pointer_rtx;
3095 set = get_varargs_alias_set ();
3097 for (i = next_cum.regno; i < ix86_regparm; i++)
3099 mem = gen_rtx_MEM (Pmode,
3100 plus_constant (save_area, i * UNITS_PER_WORD));
3101 set_mem_alias_set (mem, set);
3102 emit_move_insn (mem, gen_rtx_REG (Pmode,
3103 x86_64_int_parameter_registers[i]));
3106 if (next_cum.sse_nregs)
3108 /* Now emit code to save SSE registers. The AX parameter contains number
3109 of SSE parameter registers used to call this function. We use
3110 sse_prologue_save insn template that produces computed jump across
3111 SSE saves. We need some preparation work to get this working. */
3113 label = gen_label_rtx ();
3114 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3116 /* Compute address to jump to :
3117 label - 5*eax + nnamed_sse_arguments*5 */
3118 tmp_reg = gen_reg_rtx (Pmode);
3119 nsse_reg = gen_reg_rtx (Pmode);
3120 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3121 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3122 gen_rtx_MULT (Pmode, nsse_reg,
3123 GEN_INT (4))));
3124 if (next_cum.sse_regno)
3125 emit_move_insn
3126 (nsse_reg,
3127 gen_rtx_CONST (DImode,
3128 gen_rtx_PLUS (DImode,
3129 label_ref,
3130 GEN_INT (next_cum.sse_regno * 4))));
3131 else
3132 emit_move_insn (nsse_reg, label_ref);
3133 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3135 /* Compute address of memory block we save into. We always use pointer
3136 pointing 127 bytes after first byte to store - this is needed to keep
3137 instruction size limited by 4 bytes. */
3138 tmp_reg = gen_reg_rtx (Pmode);
3139 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3140 plus_constant (save_area,
3141 8 * REGPARM_MAX + 127)));
3142 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3143 set_mem_alias_set (mem, set);
3144 set_mem_align (mem, BITS_PER_WORD);
3146 /* And finally do the dirty job! */
3147 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3148 GEN_INT (next_cum.sse_regno), label));
3153 /* Implement va_start. */
3155 void
3156 ix86_va_start (tree valist, rtx nextarg)
3158 HOST_WIDE_INT words, n_gpr, n_fpr;
3159 tree f_gpr, f_fpr, f_ovf, f_sav;
3160 tree gpr, fpr, ovf, sav, t;
3162 /* Only 64bit target needs something special. */
3163 if (!TARGET_64BIT)
3165 std_expand_builtin_va_start (valist, nextarg);
3166 return;
3169 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3170 f_fpr = TREE_CHAIN (f_gpr);
3171 f_ovf = TREE_CHAIN (f_fpr);
3172 f_sav = TREE_CHAIN (f_ovf);
3174 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3175 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3176 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3177 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3178 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3180 /* Count number of gp and fp argument registers used. */
3181 words = current_function_args_info.words;
3182 n_gpr = current_function_args_info.regno;
3183 n_fpr = current_function_args_info.sse_regno;
3185 if (TARGET_DEBUG_ARG)
3186 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3187 (int) words, (int) n_gpr, (int) n_fpr);
3189 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3190 build_int_2 (n_gpr * 8, 0));
3191 TREE_SIDE_EFFECTS (t) = 1;
3192 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3194 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3195 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3196 TREE_SIDE_EFFECTS (t) = 1;
3197 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3199 /* Find the overflow area. */
3200 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3201 if (words != 0)
3202 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3203 build_int_2 (words * UNITS_PER_WORD, 0));
3204 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3205 TREE_SIDE_EFFECTS (t) = 1;
3206 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3208 /* Find the register save area.
3209 Prologue of the function save it right above stack frame. */
3210 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3211 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3212 TREE_SIDE_EFFECTS (t) = 1;
3213 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3216 /* Implement va_arg. */
3218 tree
3219 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3221 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3222 tree f_gpr, f_fpr, f_ovf, f_sav;
3223 tree gpr, fpr, ovf, sav, t;
3224 int size, rsize;
3225 tree lab_false, lab_over = NULL_TREE;
3226 tree addr, t2;
3227 rtx container;
3228 int indirect_p = 0;
3229 tree ptrtype;
3231 /* Only 64bit target needs something special. */
3232 if (!TARGET_64BIT)
3233 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3235 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3236 f_fpr = TREE_CHAIN (f_gpr);
3237 f_ovf = TREE_CHAIN (f_fpr);
3238 f_sav = TREE_CHAIN (f_ovf);
3240 valist = build_fold_indirect_ref (valist);
3241 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3242 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3243 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3244 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3246 size = int_size_in_bytes (type);
3247 if (size == -1)
3249 /* Variable-size types are passed by reference. */
3250 indirect_p = 1;
3251 type = build_pointer_type (type);
3252 size = int_size_in_bytes (type);
3254 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3256 container = construct_container (TYPE_MODE (type), type, 0,
3257 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3259 * Pull the value out of the saved registers ...
3262 addr = create_tmp_var (ptr_type_node, "addr");
3263 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3265 if (container)
3267 int needed_intregs, needed_sseregs;
3268 bool need_temp;
3269 tree int_addr, sse_addr;
3271 lab_false = create_artificial_label ();
3272 lab_over = create_artificial_label ();
3274 examine_argument (TYPE_MODE (type), type, 0,
3275 &needed_intregs, &needed_sseregs);
3277 need_temp = (!REG_P (container)
3278 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3279 || TYPE_ALIGN (type) > 128));
3281 /* In case we are passing structure, verify that it is consecutive block
3282 on the register save area. If not we need to do moves. */
3283 if (!need_temp && !REG_P (container))
3285 /* Verify that all registers are strictly consecutive */
3286 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3288 int i;
3290 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3292 rtx slot = XVECEXP (container, 0, i);
3293 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3294 || INTVAL (XEXP (slot, 1)) != i * 16)
3295 need_temp = 1;
3298 else
3300 int i;
3302 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3304 rtx slot = XVECEXP (container, 0, i);
3305 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3306 || INTVAL (XEXP (slot, 1)) != i * 8)
3307 need_temp = 1;
3311 if (!need_temp)
3313 int_addr = addr;
3314 sse_addr = addr;
3316 else
3318 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3319 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3320 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3321 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3323 /* First ensure that we fit completely in registers. */
3324 if (needed_intregs)
3326 t = build_int_2 ((REGPARM_MAX - needed_intregs + 1) * 8, 0);
3327 TREE_TYPE (t) = TREE_TYPE (gpr);
3328 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3329 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3330 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3331 gimplify_and_add (t, pre_p);
3333 if (needed_sseregs)
3335 t = build_int_2 ((SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3336 + REGPARM_MAX * 8, 0);
3337 TREE_TYPE (t) = TREE_TYPE (fpr);
3338 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3339 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3340 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3341 gimplify_and_add (t, pre_p);
3344 /* Compute index to start of area used for integer regs. */
3345 if (needed_intregs)
3347 /* int_addr = gpr + sav; */
3348 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3349 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3350 gimplify_and_add (t, pre_p);
3352 if (needed_sseregs)
3354 /* sse_addr = fpr + sav; */
3355 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3356 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3357 gimplify_and_add (t, pre_p);
3359 if (need_temp)
3361 int i;
3362 tree temp = create_tmp_var (type, "va_arg_tmp");
3364 /* addr = &temp; */
3365 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3366 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3367 gimplify_and_add (t, pre_p);
3369 for (i = 0; i < XVECLEN (container, 0); i++)
3371 rtx slot = XVECEXP (container, 0, i);
3372 rtx reg = XEXP (slot, 0);
3373 enum machine_mode mode = GET_MODE (reg);
3374 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3375 tree addr_type = build_pointer_type (piece_type);
3376 tree src_addr, src;
3377 int src_offset;
3378 tree dest_addr, dest;
3380 if (SSE_REGNO_P (REGNO (reg)))
3382 src_addr = sse_addr;
3383 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3385 else
3387 src_addr = int_addr;
3388 src_offset = REGNO (reg) * 8;
3390 src_addr = fold_convert (addr_type, src_addr);
3391 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3392 size_int (src_offset)));
3393 src = build_fold_indirect_ref (src_addr);
3395 dest_addr = fold_convert (addr_type, addr);
3396 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3397 size_int (INTVAL (XEXP (slot, 1)))));
3398 dest = build_fold_indirect_ref (dest_addr);
3400 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3401 gimplify_and_add (t, pre_p);
3405 if (needed_intregs)
3407 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3408 build_int_2 (needed_intregs * 8, 0));
3409 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3410 gimplify_and_add (t, pre_p);
3412 if (needed_sseregs)
3415 build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3416 build_int_2 (needed_sseregs * 16, 0));
3417 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3418 gimplify_and_add (t, pre_p);
3421 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3422 gimplify_and_add (t, pre_p);
3424 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3425 append_to_statement_list (t, pre_p);
3428 /* ... otherwise out of the overflow area. */
3430 /* Care for on-stack alignment if needed. */
3431 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3432 t = ovf;
3433 else
3435 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3436 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3437 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3439 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3441 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3442 gimplify_and_add (t2, pre_p);
3444 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3445 build_int_2 (rsize * UNITS_PER_WORD, 0));
3446 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3447 gimplify_and_add (t, pre_p);
3449 if (container)
3451 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3452 append_to_statement_list (t, pre_p);
3455 ptrtype = build_pointer_type (type);
3456 addr = fold_convert (ptrtype, addr);
3458 if (indirect_p)
3459 addr = build_fold_indirect_ref (addr);
3460 return build_fold_indirect_ref (addr);
3463 /* Return nonzero if OP is either a i387 or SSE fp register. */
3465 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3467 return ANY_FP_REG_P (op);
3470 /* Return nonzero if OP is an i387 fp register. */
3472 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3474 return FP_REG_P (op);
3477 /* Return nonzero if OP is a non-fp register_operand. */
3479 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3481 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3484 /* Return nonzero if OP is a register operand other than an
3485 i387 fp register. */
3487 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3489 return register_operand (op, mode) && !FP_REG_P (op);
3492 /* Return nonzero if OP is general operand representable on x86_64. */
3495 x86_64_general_operand (rtx op, enum machine_mode mode)
3497 if (!TARGET_64BIT)
3498 return general_operand (op, mode);
3499 if (nonimmediate_operand (op, mode))
3500 return 1;
3501 return x86_64_sign_extended_value (op);
3504 /* Return nonzero if OP is general operand representable on x86_64
3505 as either sign extended or zero extended constant. */
3508 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3510 if (!TARGET_64BIT)
3511 return general_operand (op, mode);
3512 if (nonimmediate_operand (op, mode))
3513 return 1;
3514 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3517 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3520 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3522 if (!TARGET_64BIT)
3523 return nonmemory_operand (op, mode);
3524 if (register_operand (op, mode))
3525 return 1;
3526 return x86_64_sign_extended_value (op);
3529 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3532 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3534 if (!TARGET_64BIT || !flag_pic)
3535 return nonmemory_operand (op, mode);
3536 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3537 return 1;
3538 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3539 return 1;
3540 return 0;
3543 /* Return nonzero if OPNUM's MEM should be matched
3544 in movabs* patterns. */
3547 ix86_check_movabs (rtx insn, int opnum)
3549 rtx set, mem;
3551 set = PATTERN (insn);
3552 if (GET_CODE (set) == PARALLEL)
3553 set = XVECEXP (set, 0, 0);
3554 if (GET_CODE (set) != SET)
3555 abort ();
3556 mem = XEXP (set, opnum);
3557 while (GET_CODE (mem) == SUBREG)
3558 mem = SUBREG_REG (mem);
3559 if (GET_CODE (mem) != MEM)
3560 abort ();
3561 return (volatile_ok || !MEM_VOLATILE_P (mem));
3564 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3567 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3569 if (!TARGET_64BIT)
3570 return nonmemory_operand (op, mode);
3571 if (register_operand (op, mode))
3572 return 1;
3573 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3576 /* Return nonzero if OP is immediate operand representable on x86_64. */
3579 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3581 if (!TARGET_64BIT)
3582 return immediate_operand (op, mode);
3583 return x86_64_sign_extended_value (op);
3586 /* Return nonzero if OP is immediate operand representable on x86_64. */
3589 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3591 return x86_64_zero_extended_value (op);
3594 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3595 for shift & compare patterns, as shifting by 0 does not change flags),
3596 else return zero. */
3599 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3601 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3604 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3605 reference and a constant. */
3608 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3610 switch (GET_CODE (op))
3612 case SYMBOL_REF:
3613 case LABEL_REF:
3614 return 1;
3616 case CONST:
3617 op = XEXP (op, 0);
3618 if (GET_CODE (op) == SYMBOL_REF
3619 || GET_CODE (op) == LABEL_REF
3620 || (GET_CODE (op) == UNSPEC
3621 && (XINT (op, 1) == UNSPEC_GOT
3622 || XINT (op, 1) == UNSPEC_GOTOFF
3623 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3624 return 1;
3625 if (GET_CODE (op) != PLUS
3626 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3627 return 0;
3629 op = XEXP (op, 0);
3630 if (GET_CODE (op) == SYMBOL_REF
3631 || GET_CODE (op) == LABEL_REF)
3632 return 1;
3633 /* Only @GOTOFF gets offsets. */
3634 if (GET_CODE (op) != UNSPEC
3635 || XINT (op, 1) != UNSPEC_GOTOFF)
3636 return 0;
3638 op = XVECEXP (op, 0, 0);
3639 if (GET_CODE (op) == SYMBOL_REF
3640 || GET_CODE (op) == LABEL_REF)
3641 return 1;
3642 return 0;
3644 default:
3645 return 0;
3649 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3652 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3654 if (GET_CODE (op) != CONST)
3655 return 0;
3656 op = XEXP (op, 0);
3657 if (TARGET_64BIT)
3659 if (GET_CODE (op) == UNSPEC
3660 && XINT (op, 1) == UNSPEC_GOTPCREL)
3661 return 1;
3662 if (GET_CODE (op) == PLUS
3663 && GET_CODE (XEXP (op, 0)) == UNSPEC
3664 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3665 return 1;
3667 else
3669 if (GET_CODE (op) == UNSPEC)
3670 return 1;
3671 if (GET_CODE (op) != PLUS
3672 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3673 return 0;
3674 op = XEXP (op, 0);
3675 if (GET_CODE (op) == UNSPEC)
3676 return 1;
3678 return 0;
3681 /* Return true if OP is a symbolic operand that resolves locally. */
3683 static int
3684 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3686 if (GET_CODE (op) == CONST
3687 && GET_CODE (XEXP (op, 0)) == PLUS
3688 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3689 op = XEXP (XEXP (op, 0), 0);
3691 if (GET_CODE (op) == LABEL_REF)
3692 return 1;
3694 if (GET_CODE (op) != SYMBOL_REF)
3695 return 0;
3697 if (SYMBOL_REF_LOCAL_P (op))
3698 return 1;
3700 /* There is, however, a not insubstantial body of code in the rest of
3701 the compiler that assumes it can just stick the results of
3702 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3703 /* ??? This is a hack. Should update the body of the compiler to
3704 always create a DECL an invoke targetm.encode_section_info. */
3705 if (strncmp (XSTR (op, 0), internal_label_prefix,
3706 internal_label_prefix_len) == 0)
3707 return 1;
3709 return 0;
3712 /* Test for various thread-local symbols. */
3715 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3717 if (GET_CODE (op) != SYMBOL_REF)
3718 return 0;
3719 return SYMBOL_REF_TLS_MODEL (op);
3722 static inline int
3723 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3725 if (GET_CODE (op) != SYMBOL_REF)
3726 return 0;
3727 return SYMBOL_REF_TLS_MODEL (op) == kind;
3731 global_dynamic_symbolic_operand (rtx op,
3732 enum machine_mode mode ATTRIBUTE_UNUSED)
3734 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3738 local_dynamic_symbolic_operand (rtx op,
3739 enum machine_mode mode ATTRIBUTE_UNUSED)
3741 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3745 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3747 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3751 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3753 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3756 /* Test for a valid operand for a call instruction. Don't allow the
3757 arg pointer register or virtual regs since they may decay into
3758 reg + const, which the patterns can't handle. */
3761 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3763 /* Disallow indirect through a virtual register. This leads to
3764 compiler aborts when trying to eliminate them. */
3765 if (GET_CODE (op) == REG
3766 && (op == arg_pointer_rtx
3767 || op == frame_pointer_rtx
3768 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3769 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3770 return 0;
3772 /* Disallow `call 1234'. Due to varying assembler lameness this
3773 gets either rejected or translated to `call .+1234'. */
3774 if (GET_CODE (op) == CONST_INT)
3775 return 0;
3777 /* Explicitly allow SYMBOL_REF even if pic. */
3778 if (GET_CODE (op) == SYMBOL_REF)
3779 return 1;
3781 /* Otherwise we can allow any general_operand in the address. */
3782 return general_operand (op, Pmode);
3785 /* Test for a valid operand for a call instruction. Don't allow the
3786 arg pointer register or virtual regs since they may decay into
3787 reg + const, which the patterns can't handle. */
3790 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3792 /* Disallow indirect through a virtual register. This leads to
3793 compiler aborts when trying to eliminate them. */
3794 if (GET_CODE (op) == REG
3795 && (op == arg_pointer_rtx
3796 || op == frame_pointer_rtx
3797 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3798 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3799 return 0;
3801 /* Explicitly allow SYMBOL_REF even if pic. */
3802 if (GET_CODE (op) == SYMBOL_REF)
3803 return 1;
3805 /* Otherwise we can only allow register operands. */
3806 return register_operand (op, Pmode);
3810 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3812 if (GET_CODE (op) == CONST
3813 && GET_CODE (XEXP (op, 0)) == PLUS
3814 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3815 op = XEXP (XEXP (op, 0), 0);
3816 return GET_CODE (op) == SYMBOL_REF;
3819 /* Match exactly zero and one. */
3822 const0_operand (rtx op, enum machine_mode mode)
3824 return op == CONST0_RTX (mode);
3828 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3830 return op == const1_rtx;
3833 /* Match 2, 4, or 8. Used for leal multiplicands. */
3836 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3838 return (GET_CODE (op) == CONST_INT
3839 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3843 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3845 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3849 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3851 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3855 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3857 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3861 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3863 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3867 /* True if this is a constant appropriate for an increment or decrement. */
3870 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3872 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3873 registers, since carry flag is not set. */
3874 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
3875 return 0;
3876 return op == const1_rtx || op == constm1_rtx;
3879 /* Return nonzero if OP is acceptable as operand of DImode shift
3880 expander. */
3883 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3885 if (TARGET_64BIT)
3886 return nonimmediate_operand (op, mode);
3887 else
3888 return register_operand (op, mode);
3891 /* Return false if this is the stack pointer, or any other fake
3892 register eliminable to the stack pointer. Otherwise, this is
3893 a register operand.
3895 This is used to prevent esp from being used as an index reg.
3896 Which would only happen in pathological cases. */
3899 reg_no_sp_operand (rtx op, enum machine_mode mode)
3901 rtx t = op;
3902 if (GET_CODE (t) == SUBREG)
3903 t = SUBREG_REG (t);
3904 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3905 return 0;
3907 return register_operand (op, mode);
3911 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3913 return MMX_REG_P (op);
3916 /* Return false if this is any eliminable register. Otherwise
3917 general_operand. */
3920 general_no_elim_operand (rtx op, enum machine_mode mode)
3922 rtx t = op;
3923 if (GET_CODE (t) == SUBREG)
3924 t = SUBREG_REG (t);
3925 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3926 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3927 || t == virtual_stack_dynamic_rtx)
3928 return 0;
3929 if (REG_P (t)
3930 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3931 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3932 return 0;
3934 return general_operand (op, mode);
3937 /* Return false if this is any eliminable register. Otherwise
3938 register_operand or const_int. */
3941 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3943 rtx t = op;
3944 if (GET_CODE (t) == SUBREG)
3945 t = SUBREG_REG (t);
3946 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3947 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3948 || t == virtual_stack_dynamic_rtx)
3949 return 0;
3951 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3954 /* Return false if this is any eliminable register or stack register,
3955 otherwise work like register_operand. */
3958 index_register_operand (rtx op, enum machine_mode mode)
3960 rtx t = op;
3961 if (GET_CODE (t) == SUBREG)
3962 t = SUBREG_REG (t);
3963 if (!REG_P (t))
3964 return 0;
3965 if (t == arg_pointer_rtx
3966 || t == frame_pointer_rtx
3967 || t == virtual_incoming_args_rtx
3968 || t == virtual_stack_vars_rtx
3969 || t == virtual_stack_dynamic_rtx
3970 || REGNO (t) == STACK_POINTER_REGNUM)
3971 return 0;
3973 return general_operand (op, mode);
3976 /* Return true if op is a Q_REGS class register. */
3979 q_regs_operand (rtx op, enum machine_mode mode)
3981 if (mode != VOIDmode && GET_MODE (op) != mode)
3982 return 0;
3983 if (GET_CODE (op) == SUBREG)
3984 op = SUBREG_REG (op);
3985 return ANY_QI_REG_P (op);
3988 /* Return true if op is an flags register. */
3991 flags_reg_operand (rtx op, enum machine_mode mode)
3993 if (mode != VOIDmode && GET_MODE (op) != mode)
3994 return 0;
3995 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3998 /* Return true if op is a NON_Q_REGS class register. */
4001 non_q_regs_operand (rtx op, enum machine_mode mode)
4003 if (mode != VOIDmode && GET_MODE (op) != mode)
4004 return 0;
4005 if (GET_CODE (op) == SUBREG)
4006 op = SUBREG_REG (op);
4007 return NON_QI_REG_P (op);
4011 zero_extended_scalar_load_operand (rtx op,
4012 enum machine_mode mode ATTRIBUTE_UNUSED)
4014 unsigned n_elts;
4015 if (GET_CODE (op) != MEM)
4016 return 0;
4017 op = maybe_get_pool_constant (op);
4018 if (!op)
4019 return 0;
4020 if (GET_CODE (op) != CONST_VECTOR)
4021 return 0;
4022 n_elts =
4023 (GET_MODE_SIZE (GET_MODE (op)) /
4024 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
4025 for (n_elts--; n_elts > 0; n_elts--)
4027 rtx elt = CONST_VECTOR_ELT (op, n_elts);
4028 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
4029 return 0;
4031 return 1;
4034 /* Return 1 when OP is operand acceptable for standard SSE move. */
4036 vector_move_operand (rtx op, enum machine_mode mode)
4038 if (nonimmediate_operand (op, mode))
4039 return 1;
4040 if (GET_MODE (op) != mode && mode != VOIDmode)
4041 return 0;
4042 return (op == CONST0_RTX (GET_MODE (op)));
4045 /* Return true if op if a valid address, and does not contain
4046 a segment override. */
4049 no_seg_address_operand (rtx op, enum machine_mode mode)
4051 struct ix86_address parts;
4053 if (! address_operand (op, mode))
4054 return 0;
4056 if (! ix86_decompose_address (op, &parts))
4057 abort ();
4059 return parts.seg == SEG_DEFAULT;
4062 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4063 insns. */
4065 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4067 enum rtx_code code = GET_CODE (op);
4068 switch (code)
4070 /* Operations supported directly. */
4071 case EQ:
4072 case LT:
4073 case LE:
4074 case UNORDERED:
4075 case NE:
4076 case UNGE:
4077 case UNGT:
4078 case ORDERED:
4079 return 1;
4080 /* These are equivalent to ones above in non-IEEE comparisons. */
4081 case UNEQ:
4082 case UNLT:
4083 case UNLE:
4084 case LTGT:
4085 case GE:
4086 case GT:
4087 return !TARGET_IEEE_FP;
4088 default:
4089 return 0;
4092 /* Return 1 if OP is a valid comparison operator in valid mode. */
4094 ix86_comparison_operator (rtx op, enum machine_mode mode)
4096 enum machine_mode inmode;
4097 enum rtx_code code = GET_CODE (op);
4098 if (mode != VOIDmode && GET_MODE (op) != mode)
4099 return 0;
4100 if (!COMPARISON_P (op))
4101 return 0;
4102 inmode = GET_MODE (XEXP (op, 0));
4104 if (inmode == CCFPmode || inmode == CCFPUmode)
4106 enum rtx_code second_code, bypass_code;
4107 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4108 return (bypass_code == NIL && second_code == NIL);
4110 switch (code)
4112 case EQ: case NE:
4113 return 1;
4114 case LT: case GE:
4115 if (inmode == CCmode || inmode == CCGCmode
4116 || inmode == CCGOCmode || inmode == CCNOmode)
4117 return 1;
4118 return 0;
4119 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4120 if (inmode == CCmode)
4121 return 1;
4122 return 0;
4123 case GT: case LE:
4124 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4125 return 1;
4126 return 0;
4127 default:
4128 return 0;
4132 /* Return 1 if OP is a valid comparison operator testing carry flag
4133 to be set. */
4135 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4137 enum machine_mode inmode;
4138 enum rtx_code code = GET_CODE (op);
4140 if (mode != VOIDmode && GET_MODE (op) != mode)
4141 return 0;
4142 if (!COMPARISON_P (op))
4143 return 0;
4144 inmode = GET_MODE (XEXP (op, 0));
4145 if (GET_CODE (XEXP (op, 0)) != REG
4146 || REGNO (XEXP (op, 0)) != 17
4147 || XEXP (op, 1) != const0_rtx)
4148 return 0;
4150 if (inmode == CCFPmode || inmode == CCFPUmode)
4152 enum rtx_code second_code, bypass_code;
4154 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4155 if (bypass_code != NIL || second_code != NIL)
4156 return 0;
4157 code = ix86_fp_compare_code_to_integer (code);
4159 else if (inmode != CCmode)
4160 return 0;
4161 return code == LTU;
4164 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4167 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4169 enum machine_mode inmode;
4170 enum rtx_code code = GET_CODE (op);
4172 if (mode != VOIDmode && GET_MODE (op) != mode)
4173 return 0;
4174 if (!COMPARISON_P (op))
4175 return 0;
4176 inmode = GET_MODE (XEXP (op, 0));
4177 if (inmode == CCFPmode || inmode == CCFPUmode)
4179 enum rtx_code second_code, bypass_code;
4181 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4182 if (bypass_code != NIL || second_code != NIL)
4183 return 0;
4184 code = ix86_fp_compare_code_to_integer (code);
4186 /* i387 supports just limited amount of conditional codes. */
4187 switch (code)
4189 case LTU: case GTU: case LEU: case GEU:
4190 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4191 return 1;
4192 return 0;
4193 case ORDERED: case UNORDERED:
4194 case EQ: case NE:
4195 return 1;
4196 default:
4197 return 0;
4201 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4204 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4206 switch (GET_CODE (op))
4208 case MULT:
4209 /* Modern CPUs have same latency for HImode and SImode multiply,
4210 but 386 and 486 do HImode multiply faster. */
4211 return ix86_tune > PROCESSOR_I486;
4212 case PLUS:
4213 case AND:
4214 case IOR:
4215 case XOR:
4216 case ASHIFT:
4217 return 1;
4218 default:
4219 return 0;
4223 /* Nearly general operand, but accept any const_double, since we wish
4224 to be able to drop them into memory rather than have them get pulled
4225 into registers. */
4228 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4230 if (mode != VOIDmode && mode != GET_MODE (op))
4231 return 0;
4232 if (GET_CODE (op) == CONST_DOUBLE)
4233 return 1;
4234 return general_operand (op, mode);
4237 /* Match an SI or HImode register for a zero_extract. */
4240 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4242 int regno;
4243 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4244 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4245 return 0;
4247 if (!register_operand (op, VOIDmode))
4248 return 0;
4250 /* Be careful to accept only registers having upper parts. */
4251 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4252 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4255 /* Return 1 if this is a valid binary floating-point operation.
4256 OP is the expression matched, and MODE is its mode. */
4259 binary_fp_operator (rtx op, enum machine_mode mode)
4261 if (mode != VOIDmode && mode != GET_MODE (op))
4262 return 0;
4264 switch (GET_CODE (op))
4266 case PLUS:
4267 case MINUS:
4268 case MULT:
4269 case DIV:
4270 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4272 default:
4273 return 0;
4278 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4280 return GET_CODE (op) == MULT;
4284 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4286 return GET_CODE (op) == DIV;
4290 arith_or_logical_operator (rtx op, enum machine_mode mode)
4292 return ((mode == VOIDmode || GET_MODE (op) == mode)
4293 && ARITHMETIC_P (op));
4296 /* Returns 1 if OP is memory operand with a displacement. */
4299 memory_displacement_operand (rtx op, enum machine_mode mode)
4301 struct ix86_address parts;
4303 if (! memory_operand (op, mode))
4304 return 0;
4306 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4307 abort ();
4309 return parts.disp != NULL_RTX;
4312 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4313 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4315 ??? It seems likely that this will only work because cmpsi is an
4316 expander, and no actual insns use this. */
4319 cmpsi_operand (rtx op, enum machine_mode mode)
4321 if (nonimmediate_operand (op, mode))
4322 return 1;
4324 if (GET_CODE (op) == AND
4325 && GET_MODE (op) == SImode
4326 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4327 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4328 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4329 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4330 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4331 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4332 return 1;
4334 return 0;
4337 /* Returns 1 if OP is memory operand that can not be represented by the
4338 modRM array. */
4341 long_memory_operand (rtx op, enum machine_mode mode)
4343 if (! memory_operand (op, mode))
4344 return 0;
4346 return memory_address_length (op) != 0;
4349 /* Return nonzero if the rtx is known aligned. */
4352 aligned_operand (rtx op, enum machine_mode mode)
4354 struct ix86_address parts;
4356 if (!general_operand (op, mode))
4357 return 0;
4359 /* Registers and immediate operands are always "aligned". */
4360 if (GET_CODE (op) != MEM)
4361 return 1;
4363 /* Don't even try to do any aligned optimizations with volatiles. */
4364 if (MEM_VOLATILE_P (op))
4365 return 0;
4367 op = XEXP (op, 0);
4369 /* Pushes and pops are only valid on the stack pointer. */
4370 if (GET_CODE (op) == PRE_DEC
4371 || GET_CODE (op) == POST_INC)
4372 return 1;
4374 /* Decode the address. */
4375 if (! ix86_decompose_address (op, &parts))
4376 abort ();
4378 /* Look for some component that isn't known to be aligned. */
4379 if (parts.index)
4381 if (parts.scale < 4
4382 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4383 return 0;
4385 if (parts.base)
4387 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4388 return 0;
4390 if (parts.disp)
4392 if (GET_CODE (parts.disp) != CONST_INT
4393 || (INTVAL (parts.disp) & 3) != 0)
4394 return 0;
4397 /* Didn't find one -- this must be an aligned address. */
4398 return 1;
4401 /* Initialize the table of extra 80387 mathematical constants. */
4403 static void
4404 init_ext_80387_constants (void)
4406 static const char * cst[5] =
4408 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4409 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4410 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4411 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4412 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4414 int i;
4416 for (i = 0; i < 5; i++)
4418 real_from_string (&ext_80387_constants_table[i], cst[i]);
4419 /* Ensure each constant is rounded to XFmode precision. */
4420 real_convert (&ext_80387_constants_table[i],
4421 XFmode, &ext_80387_constants_table[i]);
4424 ext_80387_constants_init = 1;
4427 /* Return true if the constant is something that can be loaded with
4428 a special instruction. */
4431 standard_80387_constant_p (rtx x)
4433 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4434 return -1;
4436 if (x == CONST0_RTX (GET_MODE (x)))
4437 return 1;
4438 if (x == CONST1_RTX (GET_MODE (x)))
4439 return 2;
4441 /* For XFmode constants, try to find a special 80387 instruction when
4442 optimizing for size or on those CPUs that benefit from them. */
4443 if (GET_MODE (x) == XFmode
4444 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4446 REAL_VALUE_TYPE r;
4447 int i;
4449 if (! ext_80387_constants_init)
4450 init_ext_80387_constants ();
4452 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4453 for (i = 0; i < 5; i++)
4454 if (real_identical (&r, &ext_80387_constants_table[i]))
4455 return i + 3;
4458 return 0;
4461 /* Return the opcode of the special instruction to be used to load
4462 the constant X. */
4464 const char *
4465 standard_80387_constant_opcode (rtx x)
4467 switch (standard_80387_constant_p (x))
4469 case 1:
4470 return "fldz";
4471 case 2:
4472 return "fld1";
4473 case 3:
4474 return "fldlg2";
4475 case 4:
4476 return "fldln2";
4477 case 5:
4478 return "fldl2e";
4479 case 6:
4480 return "fldl2t";
4481 case 7:
4482 return "fldpi";
4484 abort ();
4487 /* Return the CONST_DOUBLE representing the 80387 constant that is
4488 loaded by the specified special instruction. The argument IDX
4489 matches the return value from standard_80387_constant_p. */
4492 standard_80387_constant_rtx (int idx)
4494 int i;
4496 if (! ext_80387_constants_init)
4497 init_ext_80387_constants ();
4499 switch (idx)
4501 case 3:
4502 case 4:
4503 case 5:
4504 case 6:
4505 case 7:
4506 i = idx - 3;
4507 break;
4509 default:
4510 abort ();
4513 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4514 XFmode);
4517 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4520 standard_sse_constant_p (rtx x)
4522 if (x == const0_rtx)
4523 return 1;
4524 return (x == CONST0_RTX (GET_MODE (x)));
4527 /* Returns 1 if OP contains a symbol reference */
4530 symbolic_reference_mentioned_p (rtx op)
4532 const char *fmt;
4533 int i;
4535 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4536 return 1;
4538 fmt = GET_RTX_FORMAT (GET_CODE (op));
4539 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4541 if (fmt[i] == 'E')
4543 int j;
4545 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4546 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4547 return 1;
4550 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4551 return 1;
4554 return 0;
4557 /* Return 1 if it is appropriate to emit `ret' instructions in the
4558 body of a function. Do this only if the epilogue is simple, needing a
4559 couple of insns. Prior to reloading, we can't tell how many registers
4560 must be saved, so return 0 then. Return 0 if there is no frame
4561 marker to de-allocate.
4563 If NON_SAVING_SETJMP is defined and true, then it is not possible
4564 for the epilogue to be simple, so return 0. This is a special case
4565 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4566 until final, but jump_optimize may need to know sooner if a
4567 `return' is OK. */
4570 ix86_can_use_return_insn_p (void)
4572 struct ix86_frame frame;
4574 #ifdef NON_SAVING_SETJMP
4575 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4576 return 0;
4577 #endif
4579 if (! reload_completed || frame_pointer_needed)
4580 return 0;
4582 /* Don't allow more than 32 pop, since that's all we can do
4583 with one instruction. */
4584 if (current_function_pops_args
4585 && current_function_args_size >= 32768)
4586 return 0;
4588 ix86_compute_frame_layout (&frame);
4589 return frame.to_allocate == 0 && frame.nregs == 0;
4592 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4594 x86_64_sign_extended_value (rtx value)
4596 switch (GET_CODE (value))
4598 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4599 to be at least 32 and this all acceptable constants are
4600 represented as CONST_INT. */
4601 case CONST_INT:
4602 if (HOST_BITS_PER_WIDE_INT == 32)
4603 return 1;
4604 else
4606 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4607 return trunc_int_for_mode (val, SImode) == val;
4609 break;
4611 /* For certain code models, the symbolic references are known to fit.
4612 in CM_SMALL_PIC model we know it fits if it is local to the shared
4613 library. Don't count TLS SYMBOL_REFs here, since they should fit
4614 only if inside of UNSPEC handled below. */
4615 case SYMBOL_REF:
4616 /* TLS symbols are not constant. */
4617 if (tls_symbolic_operand (value, Pmode))
4618 return false;
4619 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4621 /* For certain code models, the code is near as well. */
4622 case LABEL_REF:
4623 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4624 || ix86_cmodel == CM_KERNEL);
4626 /* We also may accept the offsetted memory references in certain special
4627 cases. */
4628 case CONST:
4629 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4630 switch (XINT (XEXP (value, 0), 1))
4632 case UNSPEC_GOTPCREL:
4633 case UNSPEC_DTPOFF:
4634 case UNSPEC_GOTNTPOFF:
4635 case UNSPEC_NTPOFF:
4636 return 1;
4637 default:
4638 break;
4640 if (GET_CODE (XEXP (value, 0)) == PLUS)
4642 rtx op1 = XEXP (XEXP (value, 0), 0);
4643 rtx op2 = XEXP (XEXP (value, 0), 1);
4644 HOST_WIDE_INT offset;
4646 if (ix86_cmodel == CM_LARGE)
4647 return 0;
4648 if (GET_CODE (op2) != CONST_INT)
4649 return 0;
4650 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4651 switch (GET_CODE (op1))
4653 case SYMBOL_REF:
4654 /* For CM_SMALL assume that latest object is 16MB before
4655 end of 31bits boundary. We may also accept pretty
4656 large negative constants knowing that all objects are
4657 in the positive half of address space. */
4658 if (ix86_cmodel == CM_SMALL
4659 && offset < 16*1024*1024
4660 && trunc_int_for_mode (offset, SImode) == offset)
4661 return 1;
4662 /* For CM_KERNEL we know that all object resist in the
4663 negative half of 32bits address space. We may not
4664 accept negative offsets, since they may be just off
4665 and we may accept pretty large positive ones. */
4666 if (ix86_cmodel == CM_KERNEL
4667 && offset > 0
4668 && trunc_int_for_mode (offset, SImode) == offset)
4669 return 1;
4670 break;
4671 case LABEL_REF:
4672 /* These conditions are similar to SYMBOL_REF ones, just the
4673 constraints for code models differ. */
4674 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4675 && offset < 16*1024*1024
4676 && trunc_int_for_mode (offset, SImode) == offset)
4677 return 1;
4678 if (ix86_cmodel == CM_KERNEL
4679 && offset > 0
4680 && trunc_int_for_mode (offset, SImode) == offset)
4681 return 1;
4682 break;
4683 case UNSPEC:
4684 switch (XINT (op1, 1))
4686 case UNSPEC_DTPOFF:
4687 case UNSPEC_NTPOFF:
4688 if (offset > 0
4689 && trunc_int_for_mode (offset, SImode) == offset)
4690 return 1;
4692 break;
4693 default:
4694 return 0;
4697 return 0;
4698 default:
4699 return 0;
4703 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4705 x86_64_zero_extended_value (rtx value)
4707 switch (GET_CODE (value))
4709 case CONST_DOUBLE:
4710 if (HOST_BITS_PER_WIDE_INT == 32)
4711 return (GET_MODE (value) == VOIDmode
4712 && !CONST_DOUBLE_HIGH (value));
4713 else
4714 return 0;
4715 case CONST_INT:
4716 if (HOST_BITS_PER_WIDE_INT == 32)
4717 return INTVAL (value) >= 0;
4718 else
4719 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4720 break;
4722 /* For certain code models, the symbolic references are known to fit. */
4723 case SYMBOL_REF:
4724 /* TLS symbols are not constant. */
4725 if (tls_symbolic_operand (value, Pmode))
4726 return false;
4727 return ix86_cmodel == CM_SMALL;
4729 /* For certain code models, the code is near as well. */
4730 case LABEL_REF:
4731 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4733 /* We also may accept the offsetted memory references in certain special
4734 cases. */
4735 case CONST:
4736 if (GET_CODE (XEXP (value, 0)) == PLUS)
4738 rtx op1 = XEXP (XEXP (value, 0), 0);
4739 rtx op2 = XEXP (XEXP (value, 0), 1);
4741 if (ix86_cmodel == CM_LARGE)
4742 return 0;
4743 switch (GET_CODE (op1))
4745 case SYMBOL_REF:
4746 return 0;
4747 /* For small code model we may accept pretty large positive
4748 offsets, since one bit is available for free. Negative
4749 offsets are limited by the size of NULL pointer area
4750 specified by the ABI. */
4751 if (ix86_cmodel == CM_SMALL
4752 && GET_CODE (op2) == CONST_INT
4753 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4754 && (trunc_int_for_mode (INTVAL (op2), SImode)
4755 == INTVAL (op2)))
4756 return 1;
4757 /* ??? For the kernel, we may accept adjustment of
4758 -0x10000000, since we know that it will just convert
4759 negative address space to positive, but perhaps this
4760 is not worthwhile. */
4761 break;
4762 case LABEL_REF:
4763 /* These conditions are similar to SYMBOL_REF ones, just the
4764 constraints for code models differ. */
4765 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4766 && GET_CODE (op2) == CONST_INT
4767 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4768 && (trunc_int_for_mode (INTVAL (op2), SImode)
4769 == INTVAL (op2)))
4770 return 1;
4771 break;
4772 default:
4773 return 0;
4776 return 0;
4777 default:
4778 return 0;
4782 /* Value should be nonzero if functions must have frame pointers.
4783 Zero means the frame pointer need not be set up (and parms may
4784 be accessed via the stack pointer) in functions that seem suitable. */
4787 ix86_frame_pointer_required (void)
4789 /* If we accessed previous frames, then the generated code expects
4790 to be able to access the saved ebp value in our frame. */
4791 if (cfun->machine->accesses_prev_frame)
4792 return 1;
4794 /* Several x86 os'es need a frame pointer for other reasons,
4795 usually pertaining to setjmp. */
4796 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4797 return 1;
4799 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4800 the frame pointer by default. Turn it back on now if we've not
4801 got a leaf function. */
4802 if (TARGET_OMIT_LEAF_FRAME_POINTER
4803 && (!current_function_is_leaf))
4804 return 1;
4806 if (current_function_profile)
4807 return 1;
4809 return 0;
4812 /* Record that the current function accesses previous call frames. */
4814 void
4815 ix86_setup_frame_addresses (void)
4817 cfun->machine->accesses_prev_frame = 1;
4820 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4821 # define USE_HIDDEN_LINKONCE 1
4822 #else
4823 # define USE_HIDDEN_LINKONCE 0
4824 #endif
4826 static int pic_labels_used;
4828 /* Fills in the label name that should be used for a pc thunk for
4829 the given register. */
4831 static void
4832 get_pc_thunk_name (char name[32], unsigned int regno)
4834 if (USE_HIDDEN_LINKONCE)
4835 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4836 else
4837 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4841 /* This function generates code for -fpic that loads %ebx with
4842 the return address of the caller and then returns. */
4844 void
4845 ix86_file_end (void)
4847 rtx xops[2];
4848 int regno;
4850 for (regno = 0; regno < 8; ++regno)
4852 char name[32];
4854 if (! ((pic_labels_used >> regno) & 1))
4855 continue;
4857 get_pc_thunk_name (name, regno);
4859 if (USE_HIDDEN_LINKONCE)
4861 tree decl;
4863 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4864 error_mark_node);
4865 TREE_PUBLIC (decl) = 1;
4866 TREE_STATIC (decl) = 1;
4867 DECL_ONE_ONLY (decl) = 1;
4869 (*targetm.asm_out.unique_section) (decl, 0);
4870 named_section (decl, NULL, 0);
4872 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4873 fputs ("\t.hidden\t", asm_out_file);
4874 assemble_name (asm_out_file, name);
4875 fputc ('\n', asm_out_file);
4876 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4878 else
4880 text_section ();
4881 ASM_OUTPUT_LABEL (asm_out_file, name);
4884 xops[0] = gen_rtx_REG (SImode, regno);
4885 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4886 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4887 output_asm_insn ("ret", xops);
4890 if (NEED_INDICATE_EXEC_STACK)
4891 file_end_indicate_exec_stack ();
4894 /* Emit code for the SET_GOT patterns. */
4896 const char *
4897 output_set_got (rtx dest)
4899 rtx xops[3];
4901 xops[0] = dest;
4902 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4904 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4906 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4908 if (!flag_pic)
4909 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4910 else
4911 output_asm_insn ("call\t%a2", xops);
4913 #if TARGET_MACHO
4914 /* Output the "canonical" label name ("Lxx$pb") here too. This
4915 is what will be referred to by the Mach-O PIC subsystem. */
4916 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4917 #endif
4918 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4919 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4921 if (flag_pic)
4922 output_asm_insn ("pop{l}\t%0", xops);
4924 else
4926 char name[32];
4927 get_pc_thunk_name (name, REGNO (dest));
4928 pic_labels_used |= 1 << REGNO (dest);
4930 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4931 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4932 output_asm_insn ("call\t%X2", xops);
4935 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4936 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4937 else if (!TARGET_MACHO)
4938 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4940 return "";
4943 /* Generate an "push" pattern for input ARG. */
4945 static rtx
4946 gen_push (rtx arg)
4948 return gen_rtx_SET (VOIDmode,
4949 gen_rtx_MEM (Pmode,
4950 gen_rtx_PRE_DEC (Pmode,
4951 stack_pointer_rtx)),
4952 arg);
4955 /* Return >= 0 if there is an unused call-clobbered register available
4956 for the entire function. */
4958 static unsigned int
4959 ix86_select_alt_pic_regnum (void)
4961 if (current_function_is_leaf && !current_function_profile)
4963 int i;
4964 for (i = 2; i >= 0; --i)
4965 if (!regs_ever_live[i])
4966 return i;
4969 return INVALID_REGNUM;
4972 /* Return 1 if we need to save REGNO. */
4973 static int
4974 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4976 if (pic_offset_table_rtx
4977 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4978 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4979 || current_function_profile
4980 || current_function_calls_eh_return
4981 || current_function_uses_const_pool))
4983 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4984 return 0;
4985 return 1;
4988 if (current_function_calls_eh_return && maybe_eh_return)
4990 unsigned i;
4991 for (i = 0; ; i++)
4993 unsigned test = EH_RETURN_DATA_REGNO (i);
4994 if (test == INVALID_REGNUM)
4995 break;
4996 if (test == regno)
4997 return 1;
5001 return (regs_ever_live[regno]
5002 && !call_used_regs[regno]
5003 && !fixed_regs[regno]
5004 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5007 /* Return number of registers to be saved on the stack. */
5009 static int
5010 ix86_nsaved_regs (void)
5012 int nregs = 0;
5013 int regno;
5015 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5016 if (ix86_save_reg (regno, true))
5017 nregs++;
5018 return nregs;
5021 /* Return the offset between two registers, one to be eliminated, and the other
5022 its replacement, at the start of a routine. */
5024 HOST_WIDE_INT
5025 ix86_initial_elimination_offset (int from, int to)
5027 struct ix86_frame frame;
5028 ix86_compute_frame_layout (&frame);
5030 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5031 return frame.hard_frame_pointer_offset;
5032 else if (from == FRAME_POINTER_REGNUM
5033 && to == HARD_FRAME_POINTER_REGNUM)
5034 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5035 else
5037 if (to != STACK_POINTER_REGNUM)
5038 abort ();
5039 else if (from == ARG_POINTER_REGNUM)
5040 return frame.stack_pointer_offset;
5041 else if (from != FRAME_POINTER_REGNUM)
5042 abort ();
5043 else
5044 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5048 /* Fill structure ix86_frame about frame of currently computed function. */
5050 static void
5051 ix86_compute_frame_layout (struct ix86_frame *frame)
5053 HOST_WIDE_INT total_size;
5054 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5055 HOST_WIDE_INT offset;
5056 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5057 HOST_WIDE_INT size = get_frame_size ();
5059 frame->nregs = ix86_nsaved_regs ();
5060 total_size = size;
5062 /* During reload iteration the amount of registers saved can change.
5063 Recompute the value as needed. Do not recompute when amount of registers
5064 didn't change as reload does mutiple calls to the function and does not
5065 expect the decision to change within single iteration. */
5066 if (!optimize_size
5067 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5069 int count = frame->nregs;
5071 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5072 /* The fast prologue uses move instead of push to save registers. This
5073 is significantly longer, but also executes faster as modern hardware
5074 can execute the moves in parallel, but can't do that for push/pop.
5076 Be careful about choosing what prologue to emit: When function takes
5077 many instructions to execute we may use slow version as well as in
5078 case function is known to be outside hot spot (this is known with
5079 feedback only). Weight the size of function by number of registers
5080 to save as it is cheap to use one or two push instructions but very
5081 slow to use many of them. */
5082 if (count)
5083 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5084 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5085 || (flag_branch_probabilities
5086 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5087 cfun->machine->use_fast_prologue_epilogue = false;
5088 else
5089 cfun->machine->use_fast_prologue_epilogue
5090 = !expensive_function_p (count);
5092 if (TARGET_PROLOGUE_USING_MOVE
5093 && cfun->machine->use_fast_prologue_epilogue)
5094 frame->save_regs_using_mov = true;
5095 else
5096 frame->save_regs_using_mov = false;
5099 /* Skip return address and saved base pointer. */
5100 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5102 frame->hard_frame_pointer_offset = offset;
5104 /* Do some sanity checking of stack_alignment_needed and
5105 preferred_alignment, since i386 port is the only using those features
5106 that may break easily. */
5108 if (size && !stack_alignment_needed)
5109 abort ();
5110 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5111 abort ();
5112 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5113 abort ();
5114 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5115 abort ();
5117 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5118 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5120 /* Register save area */
5121 offset += frame->nregs * UNITS_PER_WORD;
5123 /* Va-arg area */
5124 if (ix86_save_varrargs_registers)
5126 offset += X86_64_VARARGS_SIZE;
5127 frame->va_arg_size = X86_64_VARARGS_SIZE;
5129 else
5130 frame->va_arg_size = 0;
5132 /* Align start of frame for local function. */
5133 frame->padding1 = ((offset + stack_alignment_needed - 1)
5134 & -stack_alignment_needed) - offset;
5136 offset += frame->padding1;
5138 /* Frame pointer points here. */
5139 frame->frame_pointer_offset = offset;
5141 offset += size;
5143 /* Add outgoing arguments area. Can be skipped if we eliminated
5144 all the function calls as dead code.
5145 Skipping is however impossible when function calls alloca. Alloca
5146 expander assumes that last current_function_outgoing_args_size
5147 of stack frame are unused. */
5148 if (ACCUMULATE_OUTGOING_ARGS
5149 && (!current_function_is_leaf || current_function_calls_alloca))
5151 offset += current_function_outgoing_args_size;
5152 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5154 else
5155 frame->outgoing_arguments_size = 0;
5157 /* Align stack boundary. Only needed if we're calling another function
5158 or using alloca. */
5159 if (!current_function_is_leaf || current_function_calls_alloca)
5160 frame->padding2 = ((offset + preferred_alignment - 1)
5161 & -preferred_alignment) - offset;
5162 else
5163 frame->padding2 = 0;
5165 offset += frame->padding2;
5167 /* We've reached end of stack frame. */
5168 frame->stack_pointer_offset = offset;
5170 /* Size prologue needs to allocate. */
5171 frame->to_allocate =
5172 (size + frame->padding1 + frame->padding2
5173 + frame->outgoing_arguments_size + frame->va_arg_size);
5175 if ((!frame->to_allocate && frame->nregs <= 1)
5176 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5177 frame->save_regs_using_mov = false;
5179 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5180 && current_function_is_leaf)
5182 frame->red_zone_size = frame->to_allocate;
5183 if (frame->save_regs_using_mov)
5184 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5185 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5186 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5188 else
5189 frame->red_zone_size = 0;
5190 frame->to_allocate -= frame->red_zone_size;
5191 frame->stack_pointer_offset -= frame->red_zone_size;
5192 #if 0
5193 fprintf (stderr, "nregs: %i\n", frame->nregs);
5194 fprintf (stderr, "size: %i\n", size);
5195 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5196 fprintf (stderr, "padding1: %i\n", frame->padding1);
5197 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5198 fprintf (stderr, "padding2: %i\n", frame->padding2);
5199 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5200 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5201 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5202 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5203 frame->hard_frame_pointer_offset);
5204 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5205 #endif
5208 /* Emit code to save registers in the prologue. */
5210 static void
5211 ix86_emit_save_regs (void)
5213 int regno;
5214 rtx insn;
5216 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5217 if (ix86_save_reg (regno, true))
5219 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5220 RTX_FRAME_RELATED_P (insn) = 1;
5224 /* Emit code to save registers using MOV insns. First register
5225 is restored from POINTER + OFFSET. */
5226 static void
5227 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5229 int regno;
5230 rtx insn;
5232 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5233 if (ix86_save_reg (regno, true))
5235 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5236 Pmode, offset),
5237 gen_rtx_REG (Pmode, regno));
5238 RTX_FRAME_RELATED_P (insn) = 1;
5239 offset += UNITS_PER_WORD;
5243 /* Expand prologue or epilogue stack adjustment.
5244 The pattern exist to put a dependency on all ebp-based memory accesses.
5245 STYLE should be negative if instructions should be marked as frame related,
5246 zero if %r11 register is live and cannot be freely used and positive
5247 otherwise. */
5249 static void
5250 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5252 rtx insn;
5254 if (! TARGET_64BIT)
5255 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5256 else if (x86_64_immediate_operand (offset, DImode))
5257 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5258 else
5260 rtx r11;
5261 /* r11 is used by indirect sibcall return as well, set before the
5262 epilogue and used after the epilogue. ATM indirect sibcall
5263 shouldn't be used together with huge frame sizes in one
5264 function because of the frame_size check in sibcall.c. */
5265 if (style == 0)
5266 abort ();
5267 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5268 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5269 if (style < 0)
5270 RTX_FRAME_RELATED_P (insn) = 1;
5271 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5272 offset));
5274 if (style < 0)
5275 RTX_FRAME_RELATED_P (insn) = 1;
5278 /* Expand the prologue into a bunch of separate insns. */
5280 void
5281 ix86_expand_prologue (void)
5283 rtx insn;
5284 bool pic_reg_used;
5285 struct ix86_frame frame;
5286 HOST_WIDE_INT allocate;
5288 ix86_compute_frame_layout (&frame);
5290 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5291 slower on all targets. Also sdb doesn't like it. */
5293 if (frame_pointer_needed)
5295 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5296 RTX_FRAME_RELATED_P (insn) = 1;
5298 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5299 RTX_FRAME_RELATED_P (insn) = 1;
5302 allocate = frame.to_allocate;
5304 if (!frame.save_regs_using_mov)
5305 ix86_emit_save_regs ();
5306 else
5307 allocate += frame.nregs * UNITS_PER_WORD;
5309 /* When using red zone we may start register saving before allocating
5310 the stack frame saving one cycle of the prologue. */
5311 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5312 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5313 : stack_pointer_rtx,
5314 -frame.nregs * UNITS_PER_WORD);
5316 if (allocate == 0)
5318 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5319 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5320 GEN_INT (-allocate), -1);
5321 else
5323 /* Only valid for Win32. */
5324 rtx eax = gen_rtx_REG (SImode, 0);
5325 bool eax_live = ix86_eax_live_at_start_p ();
5327 if (TARGET_64BIT)
5328 abort ();
5330 if (eax_live)
5332 emit_insn (gen_push (eax));
5333 allocate -= 4;
5336 insn = emit_move_insn (eax, GEN_INT (allocate));
5337 RTX_FRAME_RELATED_P (insn) = 1;
5339 insn = emit_insn (gen_allocate_stack_worker (eax));
5340 RTX_FRAME_RELATED_P (insn) = 1;
5342 if (eax_live)
5344 rtx t = plus_constant (stack_pointer_rtx, allocate);
5345 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5349 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5351 if (!frame_pointer_needed || !frame.to_allocate)
5352 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5353 else
5354 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5355 -frame.nregs * UNITS_PER_WORD);
5358 pic_reg_used = false;
5359 if (pic_offset_table_rtx
5360 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5361 || current_function_profile))
5363 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5365 if (alt_pic_reg_used != INVALID_REGNUM)
5366 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5368 pic_reg_used = true;
5371 if (pic_reg_used)
5373 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5375 /* Even with accurate pre-reload life analysis, we can wind up
5376 deleting all references to the pic register after reload.
5377 Consider if cross-jumping unifies two sides of a branch
5378 controlled by a comparison vs the only read from a global.
5379 In which case, allow the set_got to be deleted, though we're
5380 too late to do anything about the ebx save in the prologue. */
5381 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5384 /* Prevent function calls from be scheduled before the call to mcount.
5385 In the pic_reg_used case, make sure that the got load isn't deleted. */
5386 if (current_function_profile)
5387 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5390 /* Emit code to restore saved registers using MOV insns. First register
5391 is restored from POINTER + OFFSET. */
5392 static void
5393 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5394 int maybe_eh_return)
5396 int regno;
5397 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5399 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5400 if (ix86_save_reg (regno, maybe_eh_return))
5402 /* Ensure that adjust_address won't be forced to produce pointer
5403 out of range allowed by x86-64 instruction set. */
5404 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5406 rtx r11;
5408 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5409 emit_move_insn (r11, GEN_INT (offset));
5410 emit_insn (gen_adddi3 (r11, r11, pointer));
5411 base_address = gen_rtx_MEM (Pmode, r11);
5412 offset = 0;
5414 emit_move_insn (gen_rtx_REG (Pmode, regno),
5415 adjust_address (base_address, Pmode, offset));
5416 offset += UNITS_PER_WORD;
5420 /* Restore function stack, frame, and registers. */
5422 void
5423 ix86_expand_epilogue (int style)
5425 int regno;
5426 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5427 struct ix86_frame frame;
5428 HOST_WIDE_INT offset;
5430 ix86_compute_frame_layout (&frame);
5432 /* Calculate start of saved registers relative to ebp. Special care
5433 must be taken for the normal return case of a function using
5434 eh_return: the eax and edx registers are marked as saved, but not
5435 restored along this path. */
5436 offset = frame.nregs;
5437 if (current_function_calls_eh_return && style != 2)
5438 offset -= 2;
5439 offset *= -UNITS_PER_WORD;
5441 /* If we're only restoring one register and sp is not valid then
5442 using a move instruction to restore the register since it's
5443 less work than reloading sp and popping the register.
5445 The default code result in stack adjustment using add/lea instruction,
5446 while this code results in LEAVE instruction (or discrete equivalent),
5447 so it is profitable in some other cases as well. Especially when there
5448 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5449 and there is exactly one register to pop. This heuristic may need some
5450 tuning in future. */
5451 if ((!sp_valid && frame.nregs <= 1)
5452 || (TARGET_EPILOGUE_USING_MOVE
5453 && cfun->machine->use_fast_prologue_epilogue
5454 && (frame.nregs > 1 || frame.to_allocate))
5455 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5456 || (frame_pointer_needed && TARGET_USE_LEAVE
5457 && cfun->machine->use_fast_prologue_epilogue
5458 && frame.nregs == 1)
5459 || current_function_calls_eh_return)
5461 /* Restore registers. We can use ebp or esp to address the memory
5462 locations. If both are available, default to ebp, since offsets
5463 are known to be small. Only exception is esp pointing directly to the
5464 end of block of saved registers, where we may simplify addressing
5465 mode. */
5467 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5468 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5469 frame.to_allocate, style == 2);
5470 else
5471 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5472 offset, style == 2);
5474 /* eh_return epilogues need %ecx added to the stack pointer. */
5475 if (style == 2)
5477 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5479 if (frame_pointer_needed)
5481 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5482 tmp = plus_constant (tmp, UNITS_PER_WORD);
5483 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5485 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5486 emit_move_insn (hard_frame_pointer_rtx, tmp);
5488 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5489 const0_rtx, style);
5491 else
5493 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5494 tmp = plus_constant (tmp, (frame.to_allocate
5495 + frame.nregs * UNITS_PER_WORD));
5496 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5499 else if (!frame_pointer_needed)
5500 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5501 GEN_INT (frame.to_allocate
5502 + frame.nregs * UNITS_PER_WORD),
5503 style);
5504 /* If not an i386, mov & pop is faster than "leave". */
5505 else if (TARGET_USE_LEAVE || optimize_size
5506 || !cfun->machine->use_fast_prologue_epilogue)
5507 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5508 else
5510 pro_epilogue_adjust_stack (stack_pointer_rtx,
5511 hard_frame_pointer_rtx,
5512 const0_rtx, style);
5513 if (TARGET_64BIT)
5514 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5515 else
5516 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5519 else
5521 /* First step is to deallocate the stack frame so that we can
5522 pop the registers. */
5523 if (!sp_valid)
5525 if (!frame_pointer_needed)
5526 abort ();
5527 pro_epilogue_adjust_stack (stack_pointer_rtx,
5528 hard_frame_pointer_rtx,
5529 GEN_INT (offset), style);
5531 else if (frame.to_allocate)
5532 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5533 GEN_INT (frame.to_allocate), style);
5535 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5536 if (ix86_save_reg (regno, false))
5538 if (TARGET_64BIT)
5539 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5540 else
5541 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5543 if (frame_pointer_needed)
5545 /* Leave results in shorter dependency chains on CPUs that are
5546 able to grok it fast. */
5547 if (TARGET_USE_LEAVE)
5548 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5549 else if (TARGET_64BIT)
5550 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5551 else
5552 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5556 /* Sibcall epilogues don't want a return instruction. */
5557 if (style == 0)
5558 return;
5560 if (current_function_pops_args && current_function_args_size)
5562 rtx popc = GEN_INT (current_function_pops_args);
5564 /* i386 can only pop 64K bytes. If asked to pop more, pop
5565 return address, do explicit add, and jump indirectly to the
5566 caller. */
5568 if (current_function_pops_args >= 65536)
5570 rtx ecx = gen_rtx_REG (SImode, 2);
5572 /* There is no "pascal" calling convention in 64bit ABI. */
5573 if (TARGET_64BIT)
5574 abort ();
5576 emit_insn (gen_popsi1 (ecx));
5577 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5578 emit_jump_insn (gen_return_indirect_internal (ecx));
5580 else
5581 emit_jump_insn (gen_return_pop_internal (popc));
5583 else
5584 emit_jump_insn (gen_return_internal ());
5587 /* Reset from the function's potential modifications. */
5589 static void
5590 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5591 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5593 if (pic_offset_table_rtx)
5594 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5597 /* Extract the parts of an RTL expression that is a valid memory address
5598 for an instruction. Return 0 if the structure of the address is
5599 grossly off. Return -1 if the address contains ASHIFT, so it is not
5600 strictly valid, but still used for computing length of lea instruction. */
5602 static int
5603 ix86_decompose_address (rtx addr, struct ix86_address *out)
5605 rtx base = NULL_RTX;
5606 rtx index = NULL_RTX;
5607 rtx disp = NULL_RTX;
5608 HOST_WIDE_INT scale = 1;
5609 rtx scale_rtx = NULL_RTX;
5610 int retval = 1;
5611 enum ix86_address_seg seg = SEG_DEFAULT;
5613 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5614 base = addr;
5615 else if (GET_CODE (addr) == PLUS)
5617 rtx addends[4], op;
5618 int n = 0, i;
5620 op = addr;
5623 if (n >= 4)
5624 return 0;
5625 addends[n++] = XEXP (op, 1);
5626 op = XEXP (op, 0);
5628 while (GET_CODE (op) == PLUS);
5629 if (n >= 4)
5630 return 0;
5631 addends[n] = op;
5633 for (i = n; i >= 0; --i)
5635 op = addends[i];
5636 switch (GET_CODE (op))
5638 case MULT:
5639 if (index)
5640 return 0;
5641 index = XEXP (op, 0);
5642 scale_rtx = XEXP (op, 1);
5643 break;
5645 case UNSPEC:
5646 if (XINT (op, 1) == UNSPEC_TP
5647 && TARGET_TLS_DIRECT_SEG_REFS
5648 && seg == SEG_DEFAULT)
5649 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5650 else
5651 return 0;
5652 break;
5654 case REG:
5655 case SUBREG:
5656 if (!base)
5657 base = op;
5658 else if (!index)
5659 index = op;
5660 else
5661 return 0;
5662 break;
5664 case CONST:
5665 case CONST_INT:
5666 case SYMBOL_REF:
5667 case LABEL_REF:
5668 if (disp)
5669 return 0;
5670 disp = op;
5671 break;
5673 default:
5674 return 0;
5678 else if (GET_CODE (addr) == MULT)
5680 index = XEXP (addr, 0); /* index*scale */
5681 scale_rtx = XEXP (addr, 1);
5683 else if (GET_CODE (addr) == ASHIFT)
5685 rtx tmp;
5687 /* We're called for lea too, which implements ashift on occasion. */
5688 index = XEXP (addr, 0);
5689 tmp = XEXP (addr, 1);
5690 if (GET_CODE (tmp) != CONST_INT)
5691 return 0;
5692 scale = INTVAL (tmp);
5693 if ((unsigned HOST_WIDE_INT) scale > 3)
5694 return 0;
5695 scale = 1 << scale;
5696 retval = -1;
5698 else
5699 disp = addr; /* displacement */
5701 /* Extract the integral value of scale. */
5702 if (scale_rtx)
5704 if (GET_CODE (scale_rtx) != CONST_INT)
5705 return 0;
5706 scale = INTVAL (scale_rtx);
5709 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5710 if (base && index && scale == 1
5711 && (index == arg_pointer_rtx
5712 || index == frame_pointer_rtx
5713 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5715 rtx tmp = base;
5716 base = index;
5717 index = tmp;
5720 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5721 if ((base == hard_frame_pointer_rtx
5722 || base == frame_pointer_rtx
5723 || base == arg_pointer_rtx) && !disp)
5724 disp = const0_rtx;
5726 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5727 Avoid this by transforming to [%esi+0]. */
5728 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5729 && base && !index && !disp
5730 && REG_P (base)
5731 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5732 disp = const0_rtx;
5734 /* Special case: encode reg+reg instead of reg*2. */
5735 if (!base && index && scale && scale == 2)
5736 base = index, scale = 1;
5738 /* Special case: scaling cannot be encoded without base or displacement. */
5739 if (!base && !disp && index && scale != 1)
5740 disp = const0_rtx;
5742 out->base = base;
5743 out->index = index;
5744 out->disp = disp;
5745 out->scale = scale;
5746 out->seg = seg;
5748 return retval;
5751 /* Return cost of the memory address x.
5752 For i386, it is better to use a complex address than let gcc copy
5753 the address into a reg and make a new pseudo. But not if the address
5754 requires to two regs - that would mean more pseudos with longer
5755 lifetimes. */
5756 static int
5757 ix86_address_cost (rtx x)
5759 struct ix86_address parts;
5760 int cost = 1;
5762 if (!ix86_decompose_address (x, &parts))
5763 abort ();
5765 /* More complex memory references are better. */
5766 if (parts.disp && parts.disp != const0_rtx)
5767 cost--;
5768 if (parts.seg != SEG_DEFAULT)
5769 cost--;
5771 /* Attempt to minimize number of registers in the address. */
5772 if ((parts.base
5773 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5774 || (parts.index
5775 && (!REG_P (parts.index)
5776 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5777 cost++;
5779 if (parts.base
5780 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5781 && parts.index
5782 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5783 && parts.base != parts.index)
5784 cost++;
5786 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5787 since it's predecode logic can't detect the length of instructions
5788 and it degenerates to vector decoded. Increase cost of such
5789 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5790 to split such addresses or even refuse such addresses at all.
5792 Following addressing modes are affected:
5793 [base+scale*index]
5794 [scale*index+disp]
5795 [base+index]
5797 The first and last case may be avoidable by explicitly coding the zero in
5798 memory address, but I don't have AMD-K6 machine handy to check this
5799 theory. */
5801 if (TARGET_K6
5802 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5803 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5804 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5805 cost += 10;
5807 return cost;
5810 /* If X is a machine specific address (i.e. a symbol or label being
5811 referenced as a displacement from the GOT implemented using an
5812 UNSPEC), then return the base term. Otherwise return X. */
5815 ix86_find_base_term (rtx x)
5817 rtx term;
5819 if (TARGET_64BIT)
5821 if (GET_CODE (x) != CONST)
5822 return x;
5823 term = XEXP (x, 0);
5824 if (GET_CODE (term) == PLUS
5825 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5826 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5827 term = XEXP (term, 0);
5828 if (GET_CODE (term) != UNSPEC
5829 || XINT (term, 1) != UNSPEC_GOTPCREL)
5830 return x;
5832 term = XVECEXP (term, 0, 0);
5834 if (GET_CODE (term) != SYMBOL_REF
5835 && GET_CODE (term) != LABEL_REF)
5836 return x;
5838 return term;
5841 term = ix86_delegitimize_address (x);
5843 if (GET_CODE (term) != SYMBOL_REF
5844 && GET_CODE (term) != LABEL_REF)
5845 return x;
5847 return term;
5850 /* Determine if a given RTX is a valid constant. We already know this
5851 satisfies CONSTANT_P. */
5853 bool
5854 legitimate_constant_p (rtx x)
5856 rtx inner;
5858 switch (GET_CODE (x))
5860 case SYMBOL_REF:
5861 /* TLS symbols are not constant. */
5862 if (tls_symbolic_operand (x, Pmode))
5863 return false;
5864 break;
5866 case CONST:
5867 inner = XEXP (x, 0);
5869 /* Offsets of TLS symbols are never valid.
5870 Discourage CSE from creating them. */
5871 if (GET_CODE (inner) == PLUS
5872 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5873 return false;
5875 if (GET_CODE (inner) == PLUS
5876 || GET_CODE (inner) == MINUS)
5878 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5879 return false;
5880 inner = XEXP (inner, 0);
5883 /* Only some unspecs are valid as "constants". */
5884 if (GET_CODE (inner) == UNSPEC)
5885 switch (XINT (inner, 1))
5887 case UNSPEC_TPOFF:
5888 case UNSPEC_NTPOFF:
5889 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5890 case UNSPEC_DTPOFF:
5891 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5892 default:
5893 return false;
5895 break;
5897 default:
5898 break;
5901 /* Otherwise we handle everything else in the move patterns. */
5902 return true;
5905 /* Determine if it's legal to put X into the constant pool. This
5906 is not possible for the address of thread-local symbols, which
5907 is checked above. */
5909 static bool
5910 ix86_cannot_force_const_mem (rtx x)
5912 return !legitimate_constant_p (x);
5915 /* Determine if a given RTX is a valid constant address. */
5917 bool
5918 constant_address_p (rtx x)
5920 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5923 /* Nonzero if the constant value X is a legitimate general operand
5924 when generating PIC code. It is given that flag_pic is on and
5925 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5927 bool
5928 legitimate_pic_operand_p (rtx x)
5930 rtx inner;
5932 switch (GET_CODE (x))
5934 case CONST:
5935 inner = XEXP (x, 0);
5937 /* Only some unspecs are valid as "constants". */
5938 if (GET_CODE (inner) == UNSPEC)
5939 switch (XINT (inner, 1))
5941 case UNSPEC_TPOFF:
5942 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5943 default:
5944 return false;
5946 /* FALLTHRU */
5948 case SYMBOL_REF:
5949 case LABEL_REF:
5950 return legitimate_pic_address_disp_p (x);
5952 default:
5953 return true;
5957 /* Determine if a given CONST RTX is a valid memory displacement
5958 in PIC mode. */
5961 legitimate_pic_address_disp_p (rtx disp)
5963 bool saw_plus;
5965 /* In 64bit mode we can allow direct addresses of symbols and labels
5966 when they are not dynamic symbols. */
5967 if (TARGET_64BIT)
5969 /* TLS references should always be enclosed in UNSPEC. */
5970 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5971 return 0;
5972 if (GET_CODE (disp) == SYMBOL_REF
5973 && ix86_cmodel == CM_SMALL_PIC
5974 && SYMBOL_REF_LOCAL_P (disp))
5975 return 1;
5976 if (GET_CODE (disp) == LABEL_REF)
5977 return 1;
5978 if (GET_CODE (disp) == CONST
5979 && GET_CODE (XEXP (disp, 0)) == PLUS)
5981 rtx op0 = XEXP (XEXP (disp, 0), 0);
5982 rtx op1 = XEXP (XEXP (disp, 0), 1);
5984 /* TLS references should always be enclosed in UNSPEC. */
5985 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5986 return 0;
5987 if (((GET_CODE (op0) == SYMBOL_REF
5988 && ix86_cmodel == CM_SMALL_PIC
5989 && SYMBOL_REF_LOCAL_P (op0))
5990 || GET_CODE (op0) == LABEL_REF)
5991 && GET_CODE (op1) == CONST_INT
5992 && INTVAL (op1) < 16*1024*1024
5993 && INTVAL (op1) >= -16*1024*1024)
5994 return 1;
5997 if (GET_CODE (disp) != CONST)
5998 return 0;
5999 disp = XEXP (disp, 0);
6001 if (TARGET_64BIT)
6003 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6004 of GOT tables. We should not need these anyway. */
6005 if (GET_CODE (disp) != UNSPEC
6006 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6007 return 0;
6009 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6010 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6011 return 0;
6012 return 1;
6015 saw_plus = false;
6016 if (GET_CODE (disp) == PLUS)
6018 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6019 return 0;
6020 disp = XEXP (disp, 0);
6021 saw_plus = true;
6024 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6025 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
6027 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6028 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6029 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6031 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6032 if (! strcmp (sym_name, "<pic base>"))
6033 return 1;
6037 if (GET_CODE (disp) != UNSPEC)
6038 return 0;
6040 switch (XINT (disp, 1))
6042 case UNSPEC_GOT:
6043 if (saw_plus)
6044 return false;
6045 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6046 case UNSPEC_GOTOFF:
6047 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6048 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6049 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6050 return false;
6051 case UNSPEC_GOTTPOFF:
6052 case UNSPEC_GOTNTPOFF:
6053 case UNSPEC_INDNTPOFF:
6054 if (saw_plus)
6055 return false;
6056 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6057 case UNSPEC_NTPOFF:
6058 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6059 case UNSPEC_DTPOFF:
6060 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6063 return 0;
6066 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6067 memory address for an instruction. The MODE argument is the machine mode
6068 for the MEM expression that wants to use this address.
6070 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6071 convert common non-canonical forms to canonical form so that they will
6072 be recognized. */
6075 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6077 struct ix86_address parts;
6078 rtx base, index, disp;
6079 HOST_WIDE_INT scale;
6080 const char *reason = NULL;
6081 rtx reason_rtx = NULL_RTX;
6083 if (TARGET_DEBUG_ADDR)
6085 fprintf (stderr,
6086 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6087 GET_MODE_NAME (mode), strict);
6088 debug_rtx (addr);
6091 if (ix86_decompose_address (addr, &parts) <= 0)
6093 reason = "decomposition failed";
6094 goto report_error;
6097 base = parts.base;
6098 index = parts.index;
6099 disp = parts.disp;
6100 scale = parts.scale;
6102 /* Validate base register.
6104 Don't allow SUBREG's here, it can lead to spill failures when the base
6105 is one word out of a two word structure, which is represented internally
6106 as a DImode int. */
6108 if (base)
6110 reason_rtx = base;
6112 if (GET_CODE (base) != REG)
6114 reason = "base is not a register";
6115 goto report_error;
6118 if (GET_MODE (base) != Pmode)
6120 reason = "base is not in Pmode";
6121 goto report_error;
6124 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6125 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6127 reason = "base is not valid";
6128 goto report_error;
6132 /* Validate index register.
6134 Don't allow SUBREG's here, it can lead to spill failures when the index
6135 is one word out of a two word structure, which is represented internally
6136 as a DImode int. */
6138 if (index)
6140 reason_rtx = index;
6142 if (GET_CODE (index) != REG)
6144 reason = "index is not a register";
6145 goto report_error;
6148 if (GET_MODE (index) != Pmode)
6150 reason = "index is not in Pmode";
6151 goto report_error;
6154 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6155 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6157 reason = "index is not valid";
6158 goto report_error;
6162 /* Validate scale factor. */
6163 if (scale != 1)
6165 reason_rtx = GEN_INT (scale);
6166 if (!index)
6168 reason = "scale without index";
6169 goto report_error;
6172 if (scale != 2 && scale != 4 && scale != 8)
6174 reason = "scale is not a valid multiplier";
6175 goto report_error;
6179 /* Validate displacement. */
6180 if (disp)
6182 reason_rtx = disp;
6184 if (GET_CODE (disp) == CONST
6185 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6186 switch (XINT (XEXP (disp, 0), 1))
6188 case UNSPEC_GOT:
6189 case UNSPEC_GOTOFF:
6190 case UNSPEC_GOTPCREL:
6191 if (!flag_pic)
6192 abort ();
6193 goto is_legitimate_pic;
6195 case UNSPEC_GOTTPOFF:
6196 case UNSPEC_GOTNTPOFF:
6197 case UNSPEC_INDNTPOFF:
6198 case UNSPEC_NTPOFF:
6199 case UNSPEC_DTPOFF:
6200 break;
6202 default:
6203 reason = "invalid address unspec";
6204 goto report_error;
6207 else if (flag_pic && (SYMBOLIC_CONST (disp)
6208 #if TARGET_MACHO
6209 && !machopic_operand_p (disp)
6210 #endif
6213 is_legitimate_pic:
6214 if (TARGET_64BIT && (index || base))
6216 /* foo@dtpoff(%rX) is ok. */
6217 if (GET_CODE (disp) != CONST
6218 || GET_CODE (XEXP (disp, 0)) != PLUS
6219 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6220 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6221 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6222 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6224 reason = "non-constant pic memory reference";
6225 goto report_error;
6228 else if (! legitimate_pic_address_disp_p (disp))
6230 reason = "displacement is an invalid pic construct";
6231 goto report_error;
6234 /* This code used to verify that a symbolic pic displacement
6235 includes the pic_offset_table_rtx register.
6237 While this is good idea, unfortunately these constructs may
6238 be created by "adds using lea" optimization for incorrect
6239 code like:
6241 int a;
6242 int foo(int i)
6244 return *(&a+i);
6247 This code is nonsensical, but results in addressing
6248 GOT table with pic_offset_table_rtx base. We can't
6249 just refuse it easily, since it gets matched by
6250 "addsi3" pattern, that later gets split to lea in the
6251 case output register differs from input. While this
6252 can be handled by separate addsi pattern for this case
6253 that never results in lea, this seems to be easier and
6254 correct fix for crash to disable this test. */
6256 else if (GET_CODE (disp) != LABEL_REF
6257 && GET_CODE (disp) != CONST_INT
6258 && (GET_CODE (disp) != CONST
6259 || !legitimate_constant_p (disp))
6260 && (GET_CODE (disp) != SYMBOL_REF
6261 || !legitimate_constant_p (disp)))
6263 reason = "displacement is not constant";
6264 goto report_error;
6266 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6268 reason = "displacement is out of range";
6269 goto report_error;
6273 /* Everything looks valid. */
6274 if (TARGET_DEBUG_ADDR)
6275 fprintf (stderr, "Success.\n");
6276 return TRUE;
6278 report_error:
6279 if (TARGET_DEBUG_ADDR)
6281 fprintf (stderr, "Error: %s\n", reason);
6282 debug_rtx (reason_rtx);
6284 return FALSE;
6287 /* Return an unique alias set for the GOT. */
6289 static HOST_WIDE_INT
6290 ix86_GOT_alias_set (void)
6292 static HOST_WIDE_INT set = -1;
6293 if (set == -1)
6294 set = new_alias_set ();
6295 return set;
6298 /* Return a legitimate reference for ORIG (an address) using the
6299 register REG. If REG is 0, a new pseudo is generated.
6301 There are two types of references that must be handled:
6303 1. Global data references must load the address from the GOT, via
6304 the PIC reg. An insn is emitted to do this load, and the reg is
6305 returned.
6307 2. Static data references, constant pool addresses, and code labels
6308 compute the address as an offset from the GOT, whose base is in
6309 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6310 differentiate them from global data objects. The returned
6311 address is the PIC reg + an unspec constant.
6313 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6314 reg also appears in the address. */
6316 static rtx
6317 legitimize_pic_address (rtx orig, rtx reg)
6319 rtx addr = orig;
6320 rtx new = orig;
6321 rtx base;
6323 #if TARGET_MACHO
6324 if (reg == 0)
6325 reg = gen_reg_rtx (Pmode);
6326 /* Use the generic Mach-O PIC machinery. */
6327 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6328 #endif
6330 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6331 new = addr;
6332 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6334 /* This symbol may be referenced via a displacement from the PIC
6335 base address (@GOTOFF). */
6337 if (reload_in_progress)
6338 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6339 if (GET_CODE (addr) == CONST)
6340 addr = XEXP (addr, 0);
6341 if (GET_CODE (addr) == PLUS)
6343 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6344 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6346 else
6347 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6348 new = gen_rtx_CONST (Pmode, new);
6349 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6351 if (reg != 0)
6353 emit_move_insn (reg, new);
6354 new = reg;
6357 else if (GET_CODE (addr) == SYMBOL_REF)
6359 if (TARGET_64BIT)
6361 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6362 new = gen_rtx_CONST (Pmode, new);
6363 new = gen_rtx_MEM (Pmode, new);
6364 RTX_UNCHANGING_P (new) = 1;
6365 set_mem_alias_set (new, ix86_GOT_alias_set ());
6367 if (reg == 0)
6368 reg = gen_reg_rtx (Pmode);
6369 /* Use directly gen_movsi, otherwise the address is loaded
6370 into register for CSE. We don't want to CSE this addresses,
6371 instead we CSE addresses from the GOT table, so skip this. */
6372 emit_insn (gen_movsi (reg, new));
6373 new = reg;
6375 else
6377 /* This symbol must be referenced via a load from the
6378 Global Offset Table (@GOT). */
6380 if (reload_in_progress)
6381 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6382 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6383 new = gen_rtx_CONST (Pmode, new);
6384 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6385 new = gen_rtx_MEM (Pmode, new);
6386 RTX_UNCHANGING_P (new) = 1;
6387 set_mem_alias_set (new, ix86_GOT_alias_set ());
6389 if (reg == 0)
6390 reg = gen_reg_rtx (Pmode);
6391 emit_move_insn (reg, new);
6392 new = reg;
6395 else
6397 if (GET_CODE (addr) == CONST)
6399 addr = XEXP (addr, 0);
6401 /* We must match stuff we generate before. Assume the only
6402 unspecs that can get here are ours. Not that we could do
6403 anything with them anyway.... */
6404 if (GET_CODE (addr) == UNSPEC
6405 || (GET_CODE (addr) == PLUS
6406 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6407 return orig;
6408 if (GET_CODE (addr) != PLUS)
6409 abort ();
6411 if (GET_CODE (addr) == PLUS)
6413 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6415 /* Check first to see if this is a constant offset from a @GOTOFF
6416 symbol reference. */
6417 if (local_symbolic_operand (op0, Pmode)
6418 && GET_CODE (op1) == CONST_INT)
6420 if (!TARGET_64BIT)
6422 if (reload_in_progress)
6423 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6424 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6425 UNSPEC_GOTOFF);
6426 new = gen_rtx_PLUS (Pmode, new, op1);
6427 new = gen_rtx_CONST (Pmode, new);
6428 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6430 if (reg != 0)
6432 emit_move_insn (reg, new);
6433 new = reg;
6436 else
6438 if (INTVAL (op1) < -16*1024*1024
6439 || INTVAL (op1) >= 16*1024*1024)
6440 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6443 else
6445 base = legitimize_pic_address (XEXP (addr, 0), reg);
6446 new = legitimize_pic_address (XEXP (addr, 1),
6447 base == reg ? NULL_RTX : reg);
6449 if (GET_CODE (new) == CONST_INT)
6450 new = plus_constant (base, INTVAL (new));
6451 else
6453 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6455 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6456 new = XEXP (new, 1);
6458 new = gen_rtx_PLUS (Pmode, base, new);
6463 return new;
6466 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6468 static rtx
6469 get_thread_pointer (int to_reg)
6471 rtx tp, reg, insn;
6473 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6474 if (!to_reg)
6475 return tp;
6477 reg = gen_reg_rtx (Pmode);
6478 insn = gen_rtx_SET (VOIDmode, reg, tp);
6479 insn = emit_insn (insn);
6481 return reg;
6484 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6485 false if we expect this to be used for a memory address and true if
6486 we expect to load the address into a register. */
6488 static rtx
6489 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6491 rtx dest, base, off, pic;
6492 int type;
6494 switch (model)
6496 case TLS_MODEL_GLOBAL_DYNAMIC:
6497 dest = gen_reg_rtx (Pmode);
6498 if (TARGET_64BIT)
6500 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6502 start_sequence ();
6503 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6504 insns = get_insns ();
6505 end_sequence ();
6507 emit_libcall_block (insns, dest, rax, x);
6509 else
6510 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6511 break;
6513 case TLS_MODEL_LOCAL_DYNAMIC:
6514 base = gen_reg_rtx (Pmode);
6515 if (TARGET_64BIT)
6517 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6519 start_sequence ();
6520 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6521 insns = get_insns ();
6522 end_sequence ();
6524 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6525 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6526 emit_libcall_block (insns, base, rax, note);
6528 else
6529 emit_insn (gen_tls_local_dynamic_base_32 (base));
6531 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6532 off = gen_rtx_CONST (Pmode, off);
6534 return gen_rtx_PLUS (Pmode, base, off);
6536 case TLS_MODEL_INITIAL_EXEC:
6537 if (TARGET_64BIT)
6539 pic = NULL;
6540 type = UNSPEC_GOTNTPOFF;
6542 else if (flag_pic)
6544 if (reload_in_progress)
6545 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6546 pic = pic_offset_table_rtx;
6547 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6549 else if (!TARGET_GNU_TLS)
6551 pic = gen_reg_rtx (Pmode);
6552 emit_insn (gen_set_got (pic));
6553 type = UNSPEC_GOTTPOFF;
6555 else
6557 pic = NULL;
6558 type = UNSPEC_INDNTPOFF;
6561 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6562 off = gen_rtx_CONST (Pmode, off);
6563 if (pic)
6564 off = gen_rtx_PLUS (Pmode, pic, off);
6565 off = gen_rtx_MEM (Pmode, off);
6566 RTX_UNCHANGING_P (off) = 1;
6567 set_mem_alias_set (off, ix86_GOT_alias_set ());
6569 if (TARGET_64BIT || TARGET_GNU_TLS)
6571 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6572 off = force_reg (Pmode, off);
6573 return gen_rtx_PLUS (Pmode, base, off);
6575 else
6577 base = get_thread_pointer (true);
6578 dest = gen_reg_rtx (Pmode);
6579 emit_insn (gen_subsi3 (dest, base, off));
6581 break;
6583 case TLS_MODEL_LOCAL_EXEC:
6584 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6585 (TARGET_64BIT || TARGET_GNU_TLS)
6586 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6587 off = gen_rtx_CONST (Pmode, off);
6589 if (TARGET_64BIT || TARGET_GNU_TLS)
6591 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6592 return gen_rtx_PLUS (Pmode, base, off);
6594 else
6596 base = get_thread_pointer (true);
6597 dest = gen_reg_rtx (Pmode);
6598 emit_insn (gen_subsi3 (dest, base, off));
6600 break;
6602 default:
6603 abort ();
6606 return dest;
6609 /* Try machine-dependent ways of modifying an illegitimate address
6610 to be legitimate. If we find one, return the new, valid address.
6611 This macro is used in only one place: `memory_address' in explow.c.
6613 OLDX is the address as it was before break_out_memory_refs was called.
6614 In some cases it is useful to look at this to decide what needs to be done.
6616 MODE and WIN are passed so that this macro can use
6617 GO_IF_LEGITIMATE_ADDRESS.
6619 It is always safe for this macro to do nothing. It exists to recognize
6620 opportunities to optimize the output.
6622 For the 80386, we handle X+REG by loading X into a register R and
6623 using R+REG. R will go in a general reg and indexing will be used.
6624 However, if REG is a broken-out memory address or multiplication,
6625 nothing needs to be done because REG can certainly go in a general reg.
6627 When -fpic is used, special handling is needed for symbolic references.
6628 See comments by legitimize_pic_address in i386.c for details. */
6631 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6633 int changed = 0;
6634 unsigned log;
6636 if (TARGET_DEBUG_ADDR)
6638 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6639 GET_MODE_NAME (mode));
6640 debug_rtx (x);
6643 log = tls_symbolic_operand (x, mode);
6644 if (log)
6645 return legitimize_tls_address (x, log, false);
6646 if (GET_CODE (x) == CONST
6647 && GET_CODE (XEXP (x, 0)) == PLUS
6648 && (log = tls_symbolic_operand (XEXP (XEXP (x, 0), 0), Pmode)))
6650 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6651 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6654 if (flag_pic && SYMBOLIC_CONST (x))
6655 return legitimize_pic_address (x, 0);
6657 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6658 if (GET_CODE (x) == ASHIFT
6659 && GET_CODE (XEXP (x, 1)) == CONST_INT
6660 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6662 changed = 1;
6663 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6664 GEN_INT (1 << log));
6667 if (GET_CODE (x) == PLUS)
6669 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6671 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6672 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6673 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6675 changed = 1;
6676 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6677 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6678 GEN_INT (1 << log));
6681 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6682 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6683 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6685 changed = 1;
6686 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6687 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6688 GEN_INT (1 << log));
6691 /* Put multiply first if it isn't already. */
6692 if (GET_CODE (XEXP (x, 1)) == MULT)
6694 rtx tmp = XEXP (x, 0);
6695 XEXP (x, 0) = XEXP (x, 1);
6696 XEXP (x, 1) = tmp;
6697 changed = 1;
6700 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6701 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6702 created by virtual register instantiation, register elimination, and
6703 similar optimizations. */
6704 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6706 changed = 1;
6707 x = gen_rtx_PLUS (Pmode,
6708 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6709 XEXP (XEXP (x, 1), 0)),
6710 XEXP (XEXP (x, 1), 1));
6713 /* Canonicalize
6714 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6715 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6716 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6717 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6718 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6719 && CONSTANT_P (XEXP (x, 1)))
6721 rtx constant;
6722 rtx other = NULL_RTX;
6724 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6726 constant = XEXP (x, 1);
6727 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6729 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6731 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6732 other = XEXP (x, 1);
6734 else
6735 constant = 0;
6737 if (constant)
6739 changed = 1;
6740 x = gen_rtx_PLUS (Pmode,
6741 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6742 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6743 plus_constant (other, INTVAL (constant)));
6747 if (changed && legitimate_address_p (mode, x, FALSE))
6748 return x;
6750 if (GET_CODE (XEXP (x, 0)) == MULT)
6752 changed = 1;
6753 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6756 if (GET_CODE (XEXP (x, 1)) == MULT)
6758 changed = 1;
6759 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6762 if (changed
6763 && GET_CODE (XEXP (x, 1)) == REG
6764 && GET_CODE (XEXP (x, 0)) == REG)
6765 return x;
6767 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6769 changed = 1;
6770 x = legitimize_pic_address (x, 0);
6773 if (changed && legitimate_address_p (mode, x, FALSE))
6774 return x;
6776 if (GET_CODE (XEXP (x, 0)) == REG)
6778 rtx temp = gen_reg_rtx (Pmode);
6779 rtx val = force_operand (XEXP (x, 1), temp);
6780 if (val != temp)
6781 emit_move_insn (temp, val);
6783 XEXP (x, 1) = temp;
6784 return x;
6787 else if (GET_CODE (XEXP (x, 1)) == REG)
6789 rtx temp = gen_reg_rtx (Pmode);
6790 rtx val = force_operand (XEXP (x, 0), temp);
6791 if (val != temp)
6792 emit_move_insn (temp, val);
6794 XEXP (x, 0) = temp;
6795 return x;
6799 return x;
6802 /* Print an integer constant expression in assembler syntax. Addition
6803 and subtraction are the only arithmetic that may appear in these
6804 expressions. FILE is the stdio stream to write to, X is the rtx, and
6805 CODE is the operand print code from the output string. */
6807 static void
6808 output_pic_addr_const (FILE *file, rtx x, int code)
6810 char buf[256];
6812 switch (GET_CODE (x))
6814 case PC:
6815 if (flag_pic)
6816 putc ('.', file);
6817 else
6818 abort ();
6819 break;
6821 case SYMBOL_REF:
6822 /* Mark the decl as referenced so that cgraph will output the function. */
6823 if (SYMBOL_REF_DECL (x))
6824 mark_decl_referenced (SYMBOL_REF_DECL (x));
6826 assemble_name (file, XSTR (x, 0));
6827 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6828 fputs ("@PLT", file);
6829 break;
6831 case LABEL_REF:
6832 x = XEXP (x, 0);
6833 /* FALLTHRU */
6834 case CODE_LABEL:
6835 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6836 assemble_name (asm_out_file, buf);
6837 break;
6839 case CONST_INT:
6840 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6841 break;
6843 case CONST:
6844 /* This used to output parentheses around the expression,
6845 but that does not work on the 386 (either ATT or BSD assembler). */
6846 output_pic_addr_const (file, XEXP (x, 0), code);
6847 break;
6849 case CONST_DOUBLE:
6850 if (GET_MODE (x) == VOIDmode)
6852 /* We can use %d if the number is <32 bits and positive. */
6853 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6854 fprintf (file, "0x%lx%08lx",
6855 (unsigned long) CONST_DOUBLE_HIGH (x),
6856 (unsigned long) CONST_DOUBLE_LOW (x));
6857 else
6858 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6860 else
6861 /* We can't handle floating point constants;
6862 PRINT_OPERAND must handle them. */
6863 output_operand_lossage ("floating constant misused");
6864 break;
6866 case PLUS:
6867 /* Some assemblers need integer constants to appear first. */
6868 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6870 output_pic_addr_const (file, XEXP (x, 0), code);
6871 putc ('+', file);
6872 output_pic_addr_const (file, XEXP (x, 1), code);
6874 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6876 output_pic_addr_const (file, XEXP (x, 1), code);
6877 putc ('+', file);
6878 output_pic_addr_const (file, XEXP (x, 0), code);
6880 else
6881 abort ();
6882 break;
6884 case MINUS:
6885 if (!TARGET_MACHO)
6886 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6887 output_pic_addr_const (file, XEXP (x, 0), code);
6888 putc ('-', file);
6889 output_pic_addr_const (file, XEXP (x, 1), code);
6890 if (!TARGET_MACHO)
6891 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6892 break;
6894 case UNSPEC:
6895 if (XVECLEN (x, 0) != 1)
6896 abort ();
6897 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6898 switch (XINT (x, 1))
6900 case UNSPEC_GOT:
6901 fputs ("@GOT", file);
6902 break;
6903 case UNSPEC_GOTOFF:
6904 fputs ("@GOTOFF", file);
6905 break;
6906 case UNSPEC_GOTPCREL:
6907 fputs ("@GOTPCREL(%rip)", file);
6908 break;
6909 case UNSPEC_GOTTPOFF:
6910 /* FIXME: This might be @TPOFF in Sun ld too. */
6911 fputs ("@GOTTPOFF", file);
6912 break;
6913 case UNSPEC_TPOFF:
6914 fputs ("@TPOFF", file);
6915 break;
6916 case UNSPEC_NTPOFF:
6917 if (TARGET_64BIT)
6918 fputs ("@TPOFF", file);
6919 else
6920 fputs ("@NTPOFF", file);
6921 break;
6922 case UNSPEC_DTPOFF:
6923 fputs ("@DTPOFF", file);
6924 break;
6925 case UNSPEC_GOTNTPOFF:
6926 if (TARGET_64BIT)
6927 fputs ("@GOTTPOFF(%rip)", file);
6928 else
6929 fputs ("@GOTNTPOFF", file);
6930 break;
6931 case UNSPEC_INDNTPOFF:
6932 fputs ("@INDNTPOFF", file);
6933 break;
6934 default:
6935 output_operand_lossage ("invalid UNSPEC as operand");
6936 break;
6938 break;
6940 default:
6941 output_operand_lossage ("invalid expression as operand");
6945 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6946 We need to handle our special PIC relocations. */
6948 void
6949 i386_dwarf_output_addr_const (FILE *file, rtx x)
6951 #ifdef ASM_QUAD
6952 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6953 #else
6954 if (TARGET_64BIT)
6955 abort ();
6956 fprintf (file, "%s", ASM_LONG);
6957 #endif
6958 if (flag_pic)
6959 output_pic_addr_const (file, x, '\0');
6960 else
6961 output_addr_const (file, x);
6962 fputc ('\n', file);
6965 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6966 We need to emit DTP-relative relocations. */
6968 void
6969 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6971 fputs (ASM_LONG, file);
6972 output_addr_const (file, x);
6973 fputs ("@DTPOFF", file);
6974 switch (size)
6976 case 4:
6977 break;
6978 case 8:
6979 fputs (", 0", file);
6980 break;
6981 default:
6982 abort ();
6986 /* In the name of slightly smaller debug output, and to cater to
6987 general assembler losage, recognize PIC+GOTOFF and turn it back
6988 into a direct symbol reference. */
6990 static rtx
6991 ix86_delegitimize_address (rtx orig_x)
6993 rtx x = orig_x, y;
6995 if (GET_CODE (x) == MEM)
6996 x = XEXP (x, 0);
6998 if (TARGET_64BIT)
7000 if (GET_CODE (x) != CONST
7001 || GET_CODE (XEXP (x, 0)) != UNSPEC
7002 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7003 || GET_CODE (orig_x) != MEM)
7004 return orig_x;
7005 return XVECEXP (XEXP (x, 0), 0, 0);
7008 if (GET_CODE (x) != PLUS
7009 || GET_CODE (XEXP (x, 1)) != CONST)
7010 return orig_x;
7012 if (GET_CODE (XEXP (x, 0)) == REG
7013 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7014 /* %ebx + GOT/GOTOFF */
7015 y = NULL;
7016 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7018 /* %ebx + %reg * scale + GOT/GOTOFF */
7019 y = XEXP (x, 0);
7020 if (GET_CODE (XEXP (y, 0)) == REG
7021 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
7022 y = XEXP (y, 1);
7023 else if (GET_CODE (XEXP (y, 1)) == REG
7024 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7025 y = XEXP (y, 0);
7026 else
7027 return orig_x;
7028 if (GET_CODE (y) != REG
7029 && GET_CODE (y) != MULT
7030 && GET_CODE (y) != ASHIFT)
7031 return orig_x;
7033 else
7034 return orig_x;
7036 x = XEXP (XEXP (x, 1), 0);
7037 if (GET_CODE (x) == UNSPEC
7038 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7039 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7041 if (y)
7042 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7043 return XVECEXP (x, 0, 0);
7046 if (GET_CODE (x) == PLUS
7047 && GET_CODE (XEXP (x, 0)) == UNSPEC
7048 && GET_CODE (XEXP (x, 1)) == CONST_INT
7049 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7050 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7051 && GET_CODE (orig_x) != MEM)))
7053 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7054 if (y)
7055 return gen_rtx_PLUS (Pmode, y, x);
7056 return x;
7059 return orig_x;
7062 static void
7063 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7064 int fp, FILE *file)
7066 const char *suffix;
7068 if (mode == CCFPmode || mode == CCFPUmode)
7070 enum rtx_code second_code, bypass_code;
7071 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7072 if (bypass_code != NIL || second_code != NIL)
7073 abort ();
7074 code = ix86_fp_compare_code_to_integer (code);
7075 mode = CCmode;
7077 if (reverse)
7078 code = reverse_condition (code);
7080 switch (code)
7082 case EQ:
7083 suffix = "e";
7084 break;
7085 case NE:
7086 suffix = "ne";
7087 break;
7088 case GT:
7089 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7090 abort ();
7091 suffix = "g";
7092 break;
7093 case GTU:
7094 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7095 Those same assemblers have the same but opposite losage on cmov. */
7096 if (mode != CCmode)
7097 abort ();
7098 suffix = fp ? "nbe" : "a";
7099 break;
7100 case LT:
7101 if (mode == CCNOmode || mode == CCGOCmode)
7102 suffix = "s";
7103 else if (mode == CCmode || mode == CCGCmode)
7104 suffix = "l";
7105 else
7106 abort ();
7107 break;
7108 case LTU:
7109 if (mode != CCmode)
7110 abort ();
7111 suffix = "b";
7112 break;
7113 case GE:
7114 if (mode == CCNOmode || mode == CCGOCmode)
7115 suffix = "ns";
7116 else if (mode == CCmode || mode == CCGCmode)
7117 suffix = "ge";
7118 else
7119 abort ();
7120 break;
7121 case GEU:
7122 /* ??? As above. */
7123 if (mode != CCmode)
7124 abort ();
7125 suffix = fp ? "nb" : "ae";
7126 break;
7127 case LE:
7128 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7129 abort ();
7130 suffix = "le";
7131 break;
7132 case LEU:
7133 if (mode != CCmode)
7134 abort ();
7135 suffix = "be";
7136 break;
7137 case UNORDERED:
7138 suffix = fp ? "u" : "p";
7139 break;
7140 case ORDERED:
7141 suffix = fp ? "nu" : "np";
7142 break;
7143 default:
7144 abort ();
7146 fputs (suffix, file);
7149 /* Print the name of register X to FILE based on its machine mode and number.
7150 If CODE is 'w', pretend the mode is HImode.
7151 If CODE is 'b', pretend the mode is QImode.
7152 If CODE is 'k', pretend the mode is SImode.
7153 If CODE is 'q', pretend the mode is DImode.
7154 If CODE is 'h', pretend the reg is the `high' byte register.
7155 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7157 void
7158 print_reg (rtx x, int code, FILE *file)
7160 if (REGNO (x) == ARG_POINTER_REGNUM
7161 || REGNO (x) == FRAME_POINTER_REGNUM
7162 || REGNO (x) == FLAGS_REG
7163 || REGNO (x) == FPSR_REG)
7164 abort ();
7166 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7167 putc ('%', file);
7169 if (code == 'w' || MMX_REG_P (x))
7170 code = 2;
7171 else if (code == 'b')
7172 code = 1;
7173 else if (code == 'k')
7174 code = 4;
7175 else if (code == 'q')
7176 code = 8;
7177 else if (code == 'y')
7178 code = 3;
7179 else if (code == 'h')
7180 code = 0;
7181 else
7182 code = GET_MODE_SIZE (GET_MODE (x));
7184 /* Irritatingly, AMD extended registers use different naming convention
7185 from the normal registers. */
7186 if (REX_INT_REG_P (x))
7188 if (!TARGET_64BIT)
7189 abort ();
7190 switch (code)
7192 case 0:
7193 error ("extended registers have no high halves");
7194 break;
7195 case 1:
7196 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7197 break;
7198 case 2:
7199 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7200 break;
7201 case 4:
7202 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7203 break;
7204 case 8:
7205 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7206 break;
7207 default:
7208 error ("unsupported operand size for extended register");
7209 break;
7211 return;
7213 switch (code)
7215 case 3:
7216 if (STACK_TOP_P (x))
7218 fputs ("st(0)", file);
7219 break;
7221 /* FALLTHRU */
7222 case 8:
7223 case 4:
7224 case 12:
7225 if (! ANY_FP_REG_P (x))
7226 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7227 /* FALLTHRU */
7228 case 16:
7229 case 2:
7230 normal:
7231 fputs (hi_reg_name[REGNO (x)], file);
7232 break;
7233 case 1:
7234 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7235 goto normal;
7236 fputs (qi_reg_name[REGNO (x)], file);
7237 break;
7238 case 0:
7239 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7240 goto normal;
7241 fputs (qi_high_reg_name[REGNO (x)], file);
7242 break;
7243 default:
7244 abort ();
7248 /* Locate some local-dynamic symbol still in use by this function
7249 so that we can print its name in some tls_local_dynamic_base
7250 pattern. */
7252 static const char *
7253 get_some_local_dynamic_name (void)
7255 rtx insn;
7257 if (cfun->machine->some_ld_name)
7258 return cfun->machine->some_ld_name;
7260 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7261 if (INSN_P (insn)
7262 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7263 return cfun->machine->some_ld_name;
7265 abort ();
7268 static int
7269 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7271 rtx x = *px;
7273 if (GET_CODE (x) == SYMBOL_REF
7274 && local_dynamic_symbolic_operand (x, Pmode))
7276 cfun->machine->some_ld_name = XSTR (x, 0);
7277 return 1;
7280 return 0;
7283 /* Meaning of CODE:
7284 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7285 C -- print opcode suffix for set/cmov insn.
7286 c -- like C, but print reversed condition
7287 F,f -- likewise, but for floating-point.
7288 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7289 otherwise nothing
7290 R -- print the prefix for register names.
7291 z -- print the opcode suffix for the size of the current operand.
7292 * -- print a star (in certain assembler syntax)
7293 A -- print an absolute memory reference.
7294 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7295 s -- print a shift double count, followed by the assemblers argument
7296 delimiter.
7297 b -- print the QImode name of the register for the indicated operand.
7298 %b0 would print %al if operands[0] is reg 0.
7299 w -- likewise, print the HImode name of the register.
7300 k -- likewise, print the SImode name of the register.
7301 q -- likewise, print the DImode name of the register.
7302 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7303 y -- print "st(0)" instead of "st" as a register.
7304 D -- print condition for SSE cmp instruction.
7305 P -- if PIC, print an @PLT suffix.
7306 X -- don't print any sort of PIC '@' suffix for a symbol.
7307 & -- print some in-use local-dynamic symbol name.
7310 void
7311 print_operand (FILE *file, rtx x, int code)
7313 if (code)
7315 switch (code)
7317 case '*':
7318 if (ASSEMBLER_DIALECT == ASM_ATT)
7319 putc ('*', file);
7320 return;
7322 case '&':
7323 assemble_name (file, get_some_local_dynamic_name ());
7324 return;
7326 case 'A':
7327 if (ASSEMBLER_DIALECT == ASM_ATT)
7328 putc ('*', file);
7329 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7331 /* Intel syntax. For absolute addresses, registers should not
7332 be surrounded by braces. */
7333 if (GET_CODE (x) != REG)
7335 putc ('[', file);
7336 PRINT_OPERAND (file, x, 0);
7337 putc (']', file);
7338 return;
7341 else
7342 abort ();
7344 PRINT_OPERAND (file, x, 0);
7345 return;
7348 case 'L':
7349 if (ASSEMBLER_DIALECT == ASM_ATT)
7350 putc ('l', file);
7351 return;
7353 case 'W':
7354 if (ASSEMBLER_DIALECT == ASM_ATT)
7355 putc ('w', file);
7356 return;
7358 case 'B':
7359 if (ASSEMBLER_DIALECT == ASM_ATT)
7360 putc ('b', file);
7361 return;
7363 case 'Q':
7364 if (ASSEMBLER_DIALECT == ASM_ATT)
7365 putc ('l', file);
7366 return;
7368 case 'S':
7369 if (ASSEMBLER_DIALECT == ASM_ATT)
7370 putc ('s', file);
7371 return;
7373 case 'T':
7374 if (ASSEMBLER_DIALECT == ASM_ATT)
7375 putc ('t', file);
7376 return;
7378 case 'z':
7379 /* 387 opcodes don't get size suffixes if the operands are
7380 registers. */
7381 if (STACK_REG_P (x))
7382 return;
7384 /* Likewise if using Intel opcodes. */
7385 if (ASSEMBLER_DIALECT == ASM_INTEL)
7386 return;
7388 /* This is the size of op from size of operand. */
7389 switch (GET_MODE_SIZE (GET_MODE (x)))
7391 case 2:
7392 #ifdef HAVE_GAS_FILDS_FISTS
7393 putc ('s', file);
7394 #endif
7395 return;
7397 case 4:
7398 if (GET_MODE (x) == SFmode)
7400 putc ('s', file);
7401 return;
7403 else
7404 putc ('l', file);
7405 return;
7407 case 12:
7408 case 16:
7409 putc ('t', file);
7410 return;
7412 case 8:
7413 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7415 #ifdef GAS_MNEMONICS
7416 putc ('q', file);
7417 #else
7418 putc ('l', file);
7419 putc ('l', file);
7420 #endif
7422 else
7423 putc ('l', file);
7424 return;
7426 default:
7427 abort ();
7430 case 'b':
7431 case 'w':
7432 case 'k':
7433 case 'q':
7434 case 'h':
7435 case 'y':
7436 case 'X':
7437 case 'P':
7438 break;
7440 case 's':
7441 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7443 PRINT_OPERAND (file, x, 0);
7444 putc (',', file);
7446 return;
7448 case 'D':
7449 /* Little bit of braindamage here. The SSE compare instructions
7450 does use completely different names for the comparisons that the
7451 fp conditional moves. */
7452 switch (GET_CODE (x))
7454 case EQ:
7455 case UNEQ:
7456 fputs ("eq", file);
7457 break;
7458 case LT:
7459 case UNLT:
7460 fputs ("lt", file);
7461 break;
7462 case LE:
7463 case UNLE:
7464 fputs ("le", file);
7465 break;
7466 case UNORDERED:
7467 fputs ("unord", file);
7468 break;
7469 case NE:
7470 case LTGT:
7471 fputs ("neq", file);
7472 break;
7473 case UNGE:
7474 case GE:
7475 fputs ("nlt", file);
7476 break;
7477 case UNGT:
7478 case GT:
7479 fputs ("nle", file);
7480 break;
7481 case ORDERED:
7482 fputs ("ord", file);
7483 break;
7484 default:
7485 abort ();
7486 break;
7488 return;
7489 case 'O':
7490 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7491 if (ASSEMBLER_DIALECT == ASM_ATT)
7493 switch (GET_MODE (x))
7495 case HImode: putc ('w', file); break;
7496 case SImode:
7497 case SFmode: putc ('l', file); break;
7498 case DImode:
7499 case DFmode: putc ('q', file); break;
7500 default: abort ();
7502 putc ('.', file);
7504 #endif
7505 return;
7506 case 'C':
7507 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7508 return;
7509 case 'F':
7510 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7511 if (ASSEMBLER_DIALECT == ASM_ATT)
7512 putc ('.', file);
7513 #endif
7514 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7515 return;
7517 /* Like above, but reverse condition */
7518 case 'c':
7519 /* Check to see if argument to %c is really a constant
7520 and not a condition code which needs to be reversed. */
7521 if (!COMPARISON_P (x))
7523 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7524 return;
7526 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7527 return;
7528 case 'f':
7529 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7530 if (ASSEMBLER_DIALECT == ASM_ATT)
7531 putc ('.', file);
7532 #endif
7533 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7534 return;
7535 case '+':
7537 rtx x;
7539 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7540 return;
7542 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7543 if (x)
7545 int pred_val = INTVAL (XEXP (x, 0));
7547 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7548 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7550 int taken = pred_val > REG_BR_PROB_BASE / 2;
7551 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7553 /* Emit hints only in the case default branch prediction
7554 heuristics would fail. */
7555 if (taken != cputaken)
7557 /* We use 3e (DS) prefix for taken branches and
7558 2e (CS) prefix for not taken branches. */
7559 if (taken)
7560 fputs ("ds ; ", file);
7561 else
7562 fputs ("cs ; ", file);
7566 return;
7568 default:
7569 output_operand_lossage ("invalid operand code `%c'", code);
7573 if (GET_CODE (x) == REG)
7574 print_reg (x, code, file);
7576 else if (GET_CODE (x) == MEM)
7578 /* No `byte ptr' prefix for call instructions. */
7579 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7581 const char * size;
7582 switch (GET_MODE_SIZE (GET_MODE (x)))
7584 case 1: size = "BYTE"; break;
7585 case 2: size = "WORD"; break;
7586 case 4: size = "DWORD"; break;
7587 case 8: size = "QWORD"; break;
7588 case 12: size = "XWORD"; break;
7589 case 16: size = "XMMWORD"; break;
7590 default:
7591 abort ();
7594 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7595 if (code == 'b')
7596 size = "BYTE";
7597 else if (code == 'w')
7598 size = "WORD";
7599 else if (code == 'k')
7600 size = "DWORD";
7602 fputs (size, file);
7603 fputs (" PTR ", file);
7606 x = XEXP (x, 0);
7607 /* Avoid (%rip) for call operands. */
7608 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7609 && GET_CODE (x) != CONST_INT)
7610 output_addr_const (file, x);
7611 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7612 output_operand_lossage ("invalid constraints for operand");
7613 else
7614 output_address (x);
7617 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7619 REAL_VALUE_TYPE r;
7620 long l;
7622 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7623 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7625 if (ASSEMBLER_DIALECT == ASM_ATT)
7626 putc ('$', file);
7627 fprintf (file, "0x%08lx", l);
7630 /* These float cases don't actually occur as immediate operands. */
7631 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7633 char dstr[30];
7635 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7636 fprintf (file, "%s", dstr);
7639 else if (GET_CODE (x) == CONST_DOUBLE
7640 && GET_MODE (x) == XFmode)
7642 char dstr[30];
7644 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7645 fprintf (file, "%s", dstr);
7648 else
7650 if (code != 'P')
7652 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7654 if (ASSEMBLER_DIALECT == ASM_ATT)
7655 putc ('$', file);
7657 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7658 || GET_CODE (x) == LABEL_REF)
7660 if (ASSEMBLER_DIALECT == ASM_ATT)
7661 putc ('$', file);
7662 else
7663 fputs ("OFFSET FLAT:", file);
7666 if (GET_CODE (x) == CONST_INT)
7667 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7668 else if (flag_pic)
7669 output_pic_addr_const (file, x, code);
7670 else
7671 output_addr_const (file, x);
7675 /* Print a memory operand whose address is ADDR. */
7677 void
7678 print_operand_address (FILE *file, rtx addr)
7680 struct ix86_address parts;
7681 rtx base, index, disp;
7682 int scale;
7684 if (! ix86_decompose_address (addr, &parts))
7685 abort ();
7687 base = parts.base;
7688 index = parts.index;
7689 disp = parts.disp;
7690 scale = parts.scale;
7692 switch (parts.seg)
7694 case SEG_DEFAULT:
7695 break;
7696 case SEG_FS:
7697 case SEG_GS:
7698 if (USER_LABEL_PREFIX[0] == 0)
7699 putc ('%', file);
7700 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7701 break;
7702 default:
7703 abort ();
7706 if (!base && !index)
7708 /* Displacement only requires special attention. */
7710 if (GET_CODE (disp) == CONST_INT)
7712 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7714 if (USER_LABEL_PREFIX[0] == 0)
7715 putc ('%', file);
7716 fputs ("ds:", file);
7718 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7720 else if (flag_pic)
7721 output_pic_addr_const (file, disp, 0);
7722 else
7723 output_addr_const (file, disp);
7725 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7726 if (TARGET_64BIT
7727 && ((GET_CODE (disp) == SYMBOL_REF
7728 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7729 || GET_CODE (disp) == LABEL_REF
7730 || (GET_CODE (disp) == CONST
7731 && GET_CODE (XEXP (disp, 0)) == PLUS
7732 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7733 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7734 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7735 fputs ("(%rip)", file);
7737 else
7739 if (ASSEMBLER_DIALECT == ASM_ATT)
7741 if (disp)
7743 if (flag_pic)
7744 output_pic_addr_const (file, disp, 0);
7745 else if (GET_CODE (disp) == LABEL_REF)
7746 output_asm_label (disp);
7747 else
7748 output_addr_const (file, disp);
7751 putc ('(', file);
7752 if (base)
7753 print_reg (base, 0, file);
7754 if (index)
7756 putc (',', file);
7757 print_reg (index, 0, file);
7758 if (scale != 1)
7759 fprintf (file, ",%d", scale);
7761 putc (')', file);
7763 else
7765 rtx offset = NULL_RTX;
7767 if (disp)
7769 /* Pull out the offset of a symbol; print any symbol itself. */
7770 if (GET_CODE (disp) == CONST
7771 && GET_CODE (XEXP (disp, 0)) == PLUS
7772 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7774 offset = XEXP (XEXP (disp, 0), 1);
7775 disp = gen_rtx_CONST (VOIDmode,
7776 XEXP (XEXP (disp, 0), 0));
7779 if (flag_pic)
7780 output_pic_addr_const (file, disp, 0);
7781 else if (GET_CODE (disp) == LABEL_REF)
7782 output_asm_label (disp);
7783 else if (GET_CODE (disp) == CONST_INT)
7784 offset = disp;
7785 else
7786 output_addr_const (file, disp);
7789 putc ('[', file);
7790 if (base)
7792 print_reg (base, 0, file);
7793 if (offset)
7795 if (INTVAL (offset) >= 0)
7796 putc ('+', file);
7797 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7800 else if (offset)
7801 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7802 else
7803 putc ('0', file);
7805 if (index)
7807 putc ('+', file);
7808 print_reg (index, 0, file);
7809 if (scale != 1)
7810 fprintf (file, "*%d", scale);
7812 putc (']', file);
7817 bool
7818 output_addr_const_extra (FILE *file, rtx x)
7820 rtx op;
7822 if (GET_CODE (x) != UNSPEC)
7823 return false;
7825 op = XVECEXP (x, 0, 0);
7826 switch (XINT (x, 1))
7828 case UNSPEC_GOTTPOFF:
7829 output_addr_const (file, op);
7830 /* FIXME: This might be @TPOFF in Sun ld. */
7831 fputs ("@GOTTPOFF", file);
7832 break;
7833 case UNSPEC_TPOFF:
7834 output_addr_const (file, op);
7835 fputs ("@TPOFF", file);
7836 break;
7837 case UNSPEC_NTPOFF:
7838 output_addr_const (file, op);
7839 if (TARGET_64BIT)
7840 fputs ("@TPOFF", file);
7841 else
7842 fputs ("@NTPOFF", file);
7843 break;
7844 case UNSPEC_DTPOFF:
7845 output_addr_const (file, op);
7846 fputs ("@DTPOFF", file);
7847 break;
7848 case UNSPEC_GOTNTPOFF:
7849 output_addr_const (file, op);
7850 if (TARGET_64BIT)
7851 fputs ("@GOTTPOFF(%rip)", file);
7852 else
7853 fputs ("@GOTNTPOFF", file);
7854 break;
7855 case UNSPEC_INDNTPOFF:
7856 output_addr_const (file, op);
7857 fputs ("@INDNTPOFF", file);
7858 break;
7860 default:
7861 return false;
7864 return true;
7867 /* Split one or more DImode RTL references into pairs of SImode
7868 references. The RTL can be REG, offsettable MEM, integer constant, or
7869 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7870 split and "num" is its length. lo_half and hi_half are output arrays
7871 that parallel "operands". */
7873 void
7874 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7876 while (num--)
7878 rtx op = operands[num];
7880 /* simplify_subreg refuse to split volatile memory addresses,
7881 but we still have to handle it. */
7882 if (GET_CODE (op) == MEM)
7884 lo_half[num] = adjust_address (op, SImode, 0);
7885 hi_half[num] = adjust_address (op, SImode, 4);
7887 else
7889 lo_half[num] = simplify_gen_subreg (SImode, op,
7890 GET_MODE (op) == VOIDmode
7891 ? DImode : GET_MODE (op), 0);
7892 hi_half[num] = simplify_gen_subreg (SImode, op,
7893 GET_MODE (op) == VOIDmode
7894 ? DImode : GET_MODE (op), 4);
7898 /* Split one or more TImode RTL references into pairs of SImode
7899 references. The RTL can be REG, offsettable MEM, integer constant, or
7900 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7901 split and "num" is its length. lo_half and hi_half are output arrays
7902 that parallel "operands". */
7904 void
7905 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7907 while (num--)
7909 rtx op = operands[num];
7911 /* simplify_subreg refuse to split volatile memory addresses, but we
7912 still have to handle it. */
7913 if (GET_CODE (op) == MEM)
7915 lo_half[num] = adjust_address (op, DImode, 0);
7916 hi_half[num] = adjust_address (op, DImode, 8);
7918 else
7920 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7921 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7926 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7927 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7928 is the expression of the binary operation. The output may either be
7929 emitted here, or returned to the caller, like all output_* functions.
7931 There is no guarantee that the operands are the same mode, as they
7932 might be within FLOAT or FLOAT_EXTEND expressions. */
7934 #ifndef SYSV386_COMPAT
7935 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7936 wants to fix the assemblers because that causes incompatibility
7937 with gcc. No-one wants to fix gcc because that causes
7938 incompatibility with assemblers... You can use the option of
7939 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7940 #define SYSV386_COMPAT 1
7941 #endif
7943 const char *
7944 output_387_binary_op (rtx insn, rtx *operands)
7946 static char buf[30];
7947 const char *p;
7948 const char *ssep;
7949 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7951 #ifdef ENABLE_CHECKING
7952 /* Even if we do not want to check the inputs, this documents input
7953 constraints. Which helps in understanding the following code. */
7954 if (STACK_REG_P (operands[0])
7955 && ((REG_P (operands[1])
7956 && REGNO (operands[0]) == REGNO (operands[1])
7957 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7958 || (REG_P (operands[2])
7959 && REGNO (operands[0]) == REGNO (operands[2])
7960 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7961 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7962 ; /* ok */
7963 else if (!is_sse)
7964 abort ();
7965 #endif
7967 switch (GET_CODE (operands[3]))
7969 case PLUS:
7970 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7971 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7972 p = "fiadd";
7973 else
7974 p = "fadd";
7975 ssep = "add";
7976 break;
7978 case MINUS:
7979 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7980 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7981 p = "fisub";
7982 else
7983 p = "fsub";
7984 ssep = "sub";
7985 break;
7987 case MULT:
7988 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7989 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7990 p = "fimul";
7991 else
7992 p = "fmul";
7993 ssep = "mul";
7994 break;
7996 case DIV:
7997 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7998 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7999 p = "fidiv";
8000 else
8001 p = "fdiv";
8002 ssep = "div";
8003 break;
8005 default:
8006 abort ();
8009 if (is_sse)
8011 strcpy (buf, ssep);
8012 if (GET_MODE (operands[0]) == SFmode)
8013 strcat (buf, "ss\t{%2, %0|%0, %2}");
8014 else
8015 strcat (buf, "sd\t{%2, %0|%0, %2}");
8016 return buf;
8018 strcpy (buf, p);
8020 switch (GET_CODE (operands[3]))
8022 case MULT:
8023 case PLUS:
8024 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8026 rtx temp = operands[2];
8027 operands[2] = operands[1];
8028 operands[1] = temp;
8031 /* know operands[0] == operands[1]. */
8033 if (GET_CODE (operands[2]) == MEM)
8035 p = "%z2\t%2";
8036 break;
8039 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8041 if (STACK_TOP_P (operands[0]))
8042 /* How is it that we are storing to a dead operand[2]?
8043 Well, presumably operands[1] is dead too. We can't
8044 store the result to st(0) as st(0) gets popped on this
8045 instruction. Instead store to operands[2] (which I
8046 think has to be st(1)). st(1) will be popped later.
8047 gcc <= 2.8.1 didn't have this check and generated
8048 assembly code that the Unixware assembler rejected. */
8049 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8050 else
8051 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8052 break;
8055 if (STACK_TOP_P (operands[0]))
8056 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8057 else
8058 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8059 break;
8061 case MINUS:
8062 case DIV:
8063 if (GET_CODE (operands[1]) == MEM)
8065 p = "r%z1\t%1";
8066 break;
8069 if (GET_CODE (operands[2]) == MEM)
8071 p = "%z2\t%2";
8072 break;
8075 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8077 #if SYSV386_COMPAT
8078 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8079 derived assemblers, confusingly reverse the direction of
8080 the operation for fsub{r} and fdiv{r} when the
8081 destination register is not st(0). The Intel assembler
8082 doesn't have this brain damage. Read !SYSV386_COMPAT to
8083 figure out what the hardware really does. */
8084 if (STACK_TOP_P (operands[0]))
8085 p = "{p\t%0, %2|rp\t%2, %0}";
8086 else
8087 p = "{rp\t%2, %0|p\t%0, %2}";
8088 #else
8089 if (STACK_TOP_P (operands[0]))
8090 /* As above for fmul/fadd, we can't store to st(0). */
8091 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8092 else
8093 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8094 #endif
8095 break;
8098 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8100 #if SYSV386_COMPAT
8101 if (STACK_TOP_P (operands[0]))
8102 p = "{rp\t%0, %1|p\t%1, %0}";
8103 else
8104 p = "{p\t%1, %0|rp\t%0, %1}";
8105 #else
8106 if (STACK_TOP_P (operands[0]))
8107 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8108 else
8109 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8110 #endif
8111 break;
8114 if (STACK_TOP_P (operands[0]))
8116 if (STACK_TOP_P (operands[1]))
8117 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8118 else
8119 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8120 break;
8122 else if (STACK_TOP_P (operands[1]))
8124 #if SYSV386_COMPAT
8125 p = "{\t%1, %0|r\t%0, %1}";
8126 #else
8127 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8128 #endif
8130 else
8132 #if SYSV386_COMPAT
8133 p = "{r\t%2, %0|\t%0, %2}";
8134 #else
8135 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8136 #endif
8138 break;
8140 default:
8141 abort ();
8144 strcat (buf, p);
8145 return buf;
8148 /* Output code to initialize control word copies used by
8149 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8150 is set to control word rounding downwards. */
8151 void
8152 emit_i387_cw_initialization (rtx normal, rtx round_down)
8154 rtx reg = gen_reg_rtx (HImode);
8156 emit_insn (gen_x86_fnstcw_1 (normal));
8157 emit_move_insn (reg, normal);
8158 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8159 && !TARGET_64BIT)
8160 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8161 else
8162 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8163 emit_move_insn (round_down, reg);
8166 /* Output code for INSN to convert a float to a signed int. OPERANDS
8167 are the insn operands. The output may be [HSD]Imode and the input
8168 operand may be [SDX]Fmode. */
8170 const char *
8171 output_fix_trunc (rtx insn, rtx *operands)
8173 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8174 int dimode_p = GET_MODE (operands[0]) == DImode;
8176 /* Jump through a hoop or two for DImode, since the hardware has no
8177 non-popping instruction. We used to do this a different way, but
8178 that was somewhat fragile and broke with post-reload splitters. */
8179 if (dimode_p && !stack_top_dies)
8180 output_asm_insn ("fld\t%y1", operands);
8182 if (!STACK_TOP_P (operands[1]))
8183 abort ();
8185 if (GET_CODE (operands[0]) != MEM)
8186 abort ();
8188 output_asm_insn ("fldcw\t%3", operands);
8189 if (stack_top_dies || dimode_p)
8190 output_asm_insn ("fistp%z0\t%0", operands);
8191 else
8192 output_asm_insn ("fist%z0\t%0", operands);
8193 output_asm_insn ("fldcw\t%2", operands);
8195 return "";
8198 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8199 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8200 when fucom should be used. */
8202 const char *
8203 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8205 int stack_top_dies;
8206 rtx cmp_op0 = operands[0];
8207 rtx cmp_op1 = operands[1];
8208 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8210 if (eflags_p == 2)
8212 cmp_op0 = cmp_op1;
8213 cmp_op1 = operands[2];
8215 if (is_sse)
8217 if (GET_MODE (operands[0]) == SFmode)
8218 if (unordered_p)
8219 return "ucomiss\t{%1, %0|%0, %1}";
8220 else
8221 return "comiss\t{%1, %0|%0, %1}";
8222 else
8223 if (unordered_p)
8224 return "ucomisd\t{%1, %0|%0, %1}";
8225 else
8226 return "comisd\t{%1, %0|%0, %1}";
8229 if (! STACK_TOP_P (cmp_op0))
8230 abort ();
8232 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8234 if (STACK_REG_P (cmp_op1)
8235 && stack_top_dies
8236 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8237 && REGNO (cmp_op1) != FIRST_STACK_REG)
8239 /* If both the top of the 387 stack dies, and the other operand
8240 is also a stack register that dies, then this must be a
8241 `fcompp' float compare */
8243 if (eflags_p == 1)
8245 /* There is no double popping fcomi variant. Fortunately,
8246 eflags is immune from the fstp's cc clobbering. */
8247 if (unordered_p)
8248 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8249 else
8250 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8251 return "fstp\t%y0";
8253 else
8255 if (eflags_p == 2)
8257 if (unordered_p)
8258 return "fucompp\n\tfnstsw\t%0";
8259 else
8260 return "fcompp\n\tfnstsw\t%0";
8262 else
8264 if (unordered_p)
8265 return "fucompp";
8266 else
8267 return "fcompp";
8271 else
8273 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8275 static const char * const alt[24] =
8277 "fcom%z1\t%y1",
8278 "fcomp%z1\t%y1",
8279 "fucom%z1\t%y1",
8280 "fucomp%z1\t%y1",
8282 "ficom%z1\t%y1",
8283 "ficomp%z1\t%y1",
8284 NULL,
8285 NULL,
8287 "fcomi\t{%y1, %0|%0, %y1}",
8288 "fcomip\t{%y1, %0|%0, %y1}",
8289 "fucomi\t{%y1, %0|%0, %y1}",
8290 "fucomip\t{%y1, %0|%0, %y1}",
8292 NULL,
8293 NULL,
8294 NULL,
8295 NULL,
8297 "fcom%z2\t%y2\n\tfnstsw\t%0",
8298 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8299 "fucom%z2\t%y2\n\tfnstsw\t%0",
8300 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8302 "ficom%z2\t%y2\n\tfnstsw\t%0",
8303 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8304 NULL,
8305 NULL
8308 int mask;
8309 const char *ret;
8311 mask = eflags_p << 3;
8312 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8313 mask |= unordered_p << 1;
8314 mask |= stack_top_dies;
8316 if (mask >= 24)
8317 abort ();
8318 ret = alt[mask];
8319 if (ret == NULL)
8320 abort ();
8322 return ret;
8326 void
8327 ix86_output_addr_vec_elt (FILE *file, int value)
8329 const char *directive = ASM_LONG;
8331 if (TARGET_64BIT)
8333 #ifdef ASM_QUAD
8334 directive = ASM_QUAD;
8335 #else
8336 abort ();
8337 #endif
8340 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8343 void
8344 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8346 if (TARGET_64BIT)
8347 fprintf (file, "%s%s%d-%s%d\n",
8348 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8349 else if (HAVE_AS_GOTOFF_IN_DATA)
8350 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8351 #if TARGET_MACHO
8352 else if (TARGET_MACHO)
8354 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8355 machopic_output_function_base_name (file);
8356 fprintf(file, "\n");
8358 #endif
8359 else
8360 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8361 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8364 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8365 for the target. */
8367 void
8368 ix86_expand_clear (rtx dest)
8370 rtx tmp;
8372 /* We play register width games, which are only valid after reload. */
8373 if (!reload_completed)
8374 abort ();
8376 /* Avoid HImode and its attendant prefix byte. */
8377 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8378 dest = gen_rtx_REG (SImode, REGNO (dest));
8380 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8382 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8383 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8385 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8386 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8389 emit_insn (tmp);
8392 /* X is an unchanging MEM. If it is a constant pool reference, return
8393 the constant pool rtx, else NULL. */
8395 static rtx
8396 maybe_get_pool_constant (rtx x)
8398 x = ix86_delegitimize_address (XEXP (x, 0));
8400 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8401 return get_pool_constant (x);
8403 return NULL_RTX;
8406 void
8407 ix86_expand_move (enum machine_mode mode, rtx operands[])
8409 int strict = (reload_in_progress || reload_completed);
8410 rtx op0, op1;
8411 enum tls_model model;
8413 op0 = operands[0];
8414 op1 = operands[1];
8416 model = tls_symbolic_operand (op1, Pmode);
8417 if (model)
8419 op1 = legitimize_tls_address (op1, model, true);
8420 op1 = force_operand (op1, op0);
8421 if (op1 == op0)
8422 return;
8425 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8427 #if TARGET_MACHO
8428 if (MACHOPIC_PURE)
8430 rtx temp = ((reload_in_progress
8431 || ((op0 && GET_CODE (op0) == REG)
8432 && mode == Pmode))
8433 ? op0 : gen_reg_rtx (Pmode));
8434 op1 = machopic_indirect_data_reference (op1, temp);
8435 op1 = machopic_legitimize_pic_address (op1, mode,
8436 temp == op1 ? 0 : temp);
8438 else if (MACHOPIC_INDIRECT)
8439 op1 = machopic_indirect_data_reference (op1, 0);
8440 if (op0 == op1)
8441 return;
8442 #else
8443 if (GET_CODE (op0) == MEM)
8444 op1 = force_reg (Pmode, op1);
8445 else
8446 op1 = legitimize_address (op1, op1, Pmode);
8447 #endif /* TARGET_MACHO */
8449 else
8451 if (GET_CODE (op0) == MEM
8452 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8453 || !push_operand (op0, mode))
8454 && GET_CODE (op1) == MEM)
8455 op1 = force_reg (mode, op1);
8457 if (push_operand (op0, mode)
8458 && ! general_no_elim_operand (op1, mode))
8459 op1 = copy_to_mode_reg (mode, op1);
8461 /* Force large constants in 64bit compilation into register
8462 to get them CSEed. */
8463 if (TARGET_64BIT && mode == DImode
8464 && immediate_operand (op1, mode)
8465 && !x86_64_zero_extended_value (op1)
8466 && !register_operand (op0, mode)
8467 && optimize && !reload_completed && !reload_in_progress)
8468 op1 = copy_to_mode_reg (mode, op1);
8470 if (FLOAT_MODE_P (mode))
8472 /* If we are loading a floating point constant to a register,
8473 force the value to memory now, since we'll get better code
8474 out the back end. */
8476 if (strict)
8478 else if (GET_CODE (op1) == CONST_DOUBLE)
8480 op1 = validize_mem (force_const_mem (mode, op1));
8481 if (!register_operand (op0, mode))
8483 rtx temp = gen_reg_rtx (mode);
8484 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8485 emit_move_insn (op0, temp);
8486 return;
8492 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8495 void
8496 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8498 /* Force constants other than zero into memory. We do not know how
8499 the instructions used to build constants modify the upper 64 bits
8500 of the register, once we have that information we may be able
8501 to handle some of them more efficiently. */
8502 if ((reload_in_progress | reload_completed) == 0
8503 && register_operand (operands[0], mode)
8504 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8505 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8507 /* Make operand1 a register if it isn't already. */
8508 if (!no_new_pseudos
8509 && !register_operand (operands[0], mode)
8510 && !register_operand (operands[1], mode))
8512 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8513 emit_move_insn (operands[0], temp);
8514 return;
8517 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8520 /* Attempt to expand a binary operator. Make the expansion closer to the
8521 actual machine, then just general_operand, which will allow 3 separate
8522 memory references (one output, two input) in a single insn. */
8524 void
8525 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8526 rtx operands[])
8528 int matching_memory;
8529 rtx src1, src2, dst, op, clob;
8531 dst = operands[0];
8532 src1 = operands[1];
8533 src2 = operands[2];
8535 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8536 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8537 && (rtx_equal_p (dst, src2)
8538 || immediate_operand (src1, mode)))
8540 rtx temp = src1;
8541 src1 = src2;
8542 src2 = temp;
8545 /* If the destination is memory, and we do not have matching source
8546 operands, do things in registers. */
8547 matching_memory = 0;
8548 if (GET_CODE (dst) == MEM)
8550 if (rtx_equal_p (dst, src1))
8551 matching_memory = 1;
8552 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8553 && rtx_equal_p (dst, src2))
8554 matching_memory = 2;
8555 else
8556 dst = gen_reg_rtx (mode);
8559 /* Both source operands cannot be in memory. */
8560 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8562 if (matching_memory != 2)
8563 src2 = force_reg (mode, src2);
8564 else
8565 src1 = force_reg (mode, src1);
8568 /* If the operation is not commutable, source 1 cannot be a constant
8569 or non-matching memory. */
8570 if ((CONSTANT_P (src1)
8571 || (!matching_memory && GET_CODE (src1) == MEM))
8572 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8573 src1 = force_reg (mode, src1);
8575 /* If optimizing, copy to regs to improve CSE */
8576 if (optimize && ! no_new_pseudos)
8578 if (GET_CODE (dst) == MEM)
8579 dst = gen_reg_rtx (mode);
8580 if (GET_CODE (src1) == MEM)
8581 src1 = force_reg (mode, src1);
8582 if (GET_CODE (src2) == MEM)
8583 src2 = force_reg (mode, src2);
8586 /* Emit the instruction. */
8588 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8589 if (reload_in_progress)
8591 /* Reload doesn't know about the flags register, and doesn't know that
8592 it doesn't want to clobber it. We can only do this with PLUS. */
8593 if (code != PLUS)
8594 abort ();
8595 emit_insn (op);
8597 else
8599 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8600 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8603 /* Fix up the destination if needed. */
8604 if (dst != operands[0])
8605 emit_move_insn (operands[0], dst);
8608 /* Return TRUE or FALSE depending on whether the binary operator meets the
8609 appropriate constraints. */
8612 ix86_binary_operator_ok (enum rtx_code code,
8613 enum machine_mode mode ATTRIBUTE_UNUSED,
8614 rtx operands[3])
8616 /* Both source operands cannot be in memory. */
8617 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8618 return 0;
8619 /* If the operation is not commutable, source 1 cannot be a constant. */
8620 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8621 return 0;
8622 /* If the destination is memory, we must have a matching source operand. */
8623 if (GET_CODE (operands[0]) == MEM
8624 && ! (rtx_equal_p (operands[0], operands[1])
8625 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8626 && rtx_equal_p (operands[0], operands[2]))))
8627 return 0;
8628 /* If the operation is not commutable and the source 1 is memory, we must
8629 have a matching destination. */
8630 if (GET_CODE (operands[1]) == MEM
8631 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8632 && ! rtx_equal_p (operands[0], operands[1]))
8633 return 0;
8634 return 1;
8637 /* Attempt to expand a unary operator. Make the expansion closer to the
8638 actual machine, then just general_operand, which will allow 2 separate
8639 memory references (one output, one input) in a single insn. */
8641 void
8642 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8643 rtx operands[])
8645 int matching_memory;
8646 rtx src, dst, op, clob;
8648 dst = operands[0];
8649 src = operands[1];
8651 /* If the destination is memory, and we do not have matching source
8652 operands, do things in registers. */
8653 matching_memory = 0;
8654 if (GET_CODE (dst) == MEM)
8656 if (rtx_equal_p (dst, src))
8657 matching_memory = 1;
8658 else
8659 dst = gen_reg_rtx (mode);
8662 /* When source operand is memory, destination must match. */
8663 if (!matching_memory && GET_CODE (src) == MEM)
8664 src = force_reg (mode, src);
8666 /* If optimizing, copy to regs to improve CSE */
8667 if (optimize && ! no_new_pseudos)
8669 if (GET_CODE (dst) == MEM)
8670 dst = gen_reg_rtx (mode);
8671 if (GET_CODE (src) == MEM)
8672 src = force_reg (mode, src);
8675 /* Emit the instruction. */
8677 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8678 if (reload_in_progress || code == NOT)
8680 /* Reload doesn't know about the flags register, and doesn't know that
8681 it doesn't want to clobber it. */
8682 if (code != NOT)
8683 abort ();
8684 emit_insn (op);
8686 else
8688 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8689 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8692 /* Fix up the destination if needed. */
8693 if (dst != operands[0])
8694 emit_move_insn (operands[0], dst);
8697 /* Return TRUE or FALSE depending on whether the unary operator meets the
8698 appropriate constraints. */
8701 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8702 enum machine_mode mode ATTRIBUTE_UNUSED,
8703 rtx operands[2] ATTRIBUTE_UNUSED)
8705 /* If one of operands is memory, source and destination must match. */
8706 if ((GET_CODE (operands[0]) == MEM
8707 || GET_CODE (operands[1]) == MEM)
8708 && ! rtx_equal_p (operands[0], operands[1]))
8709 return FALSE;
8710 return TRUE;
8713 /* Return TRUE or FALSE depending on whether the first SET in INSN
8714 has source and destination with matching CC modes, and that the
8715 CC mode is at least as constrained as REQ_MODE. */
8718 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8720 rtx set;
8721 enum machine_mode set_mode;
8723 set = PATTERN (insn);
8724 if (GET_CODE (set) == PARALLEL)
8725 set = XVECEXP (set, 0, 0);
8726 if (GET_CODE (set) != SET)
8727 abort ();
8728 if (GET_CODE (SET_SRC (set)) != COMPARE)
8729 abort ();
8731 set_mode = GET_MODE (SET_DEST (set));
8732 switch (set_mode)
8734 case CCNOmode:
8735 if (req_mode != CCNOmode
8736 && (req_mode != CCmode
8737 || XEXP (SET_SRC (set), 1) != const0_rtx))
8738 return 0;
8739 break;
8740 case CCmode:
8741 if (req_mode == CCGCmode)
8742 return 0;
8743 /* FALLTHRU */
8744 case CCGCmode:
8745 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8746 return 0;
8747 /* FALLTHRU */
8748 case CCGOCmode:
8749 if (req_mode == CCZmode)
8750 return 0;
8751 /* FALLTHRU */
8752 case CCZmode:
8753 break;
8755 default:
8756 abort ();
8759 return (GET_MODE (SET_SRC (set)) == set_mode);
8762 /* Generate insn patterns to do an integer compare of OPERANDS. */
8764 static rtx
8765 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8767 enum machine_mode cmpmode;
8768 rtx tmp, flags;
8770 cmpmode = SELECT_CC_MODE (code, op0, op1);
8771 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8773 /* This is very simple, but making the interface the same as in the
8774 FP case makes the rest of the code easier. */
8775 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8776 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8778 /* Return the test that should be put into the flags user, i.e.
8779 the bcc, scc, or cmov instruction. */
8780 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8783 /* Figure out whether to use ordered or unordered fp comparisons.
8784 Return the appropriate mode to use. */
8786 enum machine_mode
8787 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8789 /* ??? In order to make all comparisons reversible, we do all comparisons
8790 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8791 all forms trapping and nontrapping comparisons, we can make inequality
8792 comparisons trapping again, since it results in better code when using
8793 FCOM based compares. */
8794 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8797 enum machine_mode
8798 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8800 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8801 return ix86_fp_compare_mode (code);
8802 switch (code)
8804 /* Only zero flag is needed. */
8805 case EQ: /* ZF=0 */
8806 case NE: /* ZF!=0 */
8807 return CCZmode;
8808 /* Codes needing carry flag. */
8809 case GEU: /* CF=0 */
8810 case GTU: /* CF=0 & ZF=0 */
8811 case LTU: /* CF=1 */
8812 case LEU: /* CF=1 | ZF=1 */
8813 return CCmode;
8814 /* Codes possibly doable only with sign flag when
8815 comparing against zero. */
8816 case GE: /* SF=OF or SF=0 */
8817 case LT: /* SF<>OF or SF=1 */
8818 if (op1 == const0_rtx)
8819 return CCGOCmode;
8820 else
8821 /* For other cases Carry flag is not required. */
8822 return CCGCmode;
8823 /* Codes doable only with sign flag when comparing
8824 against zero, but we miss jump instruction for it
8825 so we need to use relational tests against overflow
8826 that thus needs to be zero. */
8827 case GT: /* ZF=0 & SF=OF */
8828 case LE: /* ZF=1 | SF<>OF */
8829 if (op1 == const0_rtx)
8830 return CCNOmode;
8831 else
8832 return CCGCmode;
8833 /* strcmp pattern do (use flags) and combine may ask us for proper
8834 mode. */
8835 case USE:
8836 return CCmode;
8837 default:
8838 abort ();
8842 /* Return the fixed registers used for condition codes. */
8844 static bool
8845 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8847 *p1 = FLAGS_REG;
8848 *p2 = FPSR_REG;
8849 return true;
8852 /* If two condition code modes are compatible, return a condition code
8853 mode which is compatible with both. Otherwise, return
8854 VOIDmode. */
8856 static enum machine_mode
8857 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8859 if (m1 == m2)
8860 return m1;
8862 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8863 return VOIDmode;
8865 if ((m1 == CCGCmode && m2 == CCGOCmode)
8866 || (m1 == CCGOCmode && m2 == CCGCmode))
8867 return CCGCmode;
8869 switch (m1)
8871 default:
8872 abort ();
8874 case CCmode:
8875 case CCGCmode:
8876 case CCGOCmode:
8877 case CCNOmode:
8878 case CCZmode:
8879 switch (m2)
8881 default:
8882 return VOIDmode;
8884 case CCmode:
8885 case CCGCmode:
8886 case CCGOCmode:
8887 case CCNOmode:
8888 case CCZmode:
8889 return CCmode;
8892 case CCFPmode:
8893 case CCFPUmode:
8894 /* These are only compatible with themselves, which we already
8895 checked above. */
8896 return VOIDmode;
8900 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8903 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8905 enum rtx_code swapped_code = swap_condition (code);
8906 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8907 || (ix86_fp_comparison_cost (swapped_code)
8908 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8911 /* Swap, force into registers, or otherwise massage the two operands
8912 to a fp comparison. The operands are updated in place; the new
8913 comparison code is returned. */
8915 static enum rtx_code
8916 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8918 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8919 rtx op0 = *pop0, op1 = *pop1;
8920 enum machine_mode op_mode = GET_MODE (op0);
8921 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8923 /* All of the unordered compare instructions only work on registers.
8924 The same is true of the XFmode compare instructions. The same is
8925 true of the fcomi compare instructions. */
8927 if (!is_sse
8928 && (fpcmp_mode == CCFPUmode
8929 || op_mode == XFmode
8930 || ix86_use_fcomi_compare (code)))
8932 op0 = force_reg (op_mode, op0);
8933 op1 = force_reg (op_mode, op1);
8935 else
8937 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8938 things around if they appear profitable, otherwise force op0
8939 into a register. */
8941 if (standard_80387_constant_p (op0) == 0
8942 || (GET_CODE (op0) == MEM
8943 && ! (standard_80387_constant_p (op1) == 0
8944 || GET_CODE (op1) == MEM)))
8946 rtx tmp;
8947 tmp = op0, op0 = op1, op1 = tmp;
8948 code = swap_condition (code);
8951 if (GET_CODE (op0) != REG)
8952 op0 = force_reg (op_mode, op0);
8954 if (CONSTANT_P (op1))
8956 if (standard_80387_constant_p (op1))
8957 op1 = force_reg (op_mode, op1);
8958 else
8959 op1 = validize_mem (force_const_mem (op_mode, op1));
8963 /* Try to rearrange the comparison to make it cheaper. */
8964 if (ix86_fp_comparison_cost (code)
8965 > ix86_fp_comparison_cost (swap_condition (code))
8966 && (GET_CODE (op1) == REG || !no_new_pseudos))
8968 rtx tmp;
8969 tmp = op0, op0 = op1, op1 = tmp;
8970 code = swap_condition (code);
8971 if (GET_CODE (op0) != REG)
8972 op0 = force_reg (op_mode, op0);
8975 *pop0 = op0;
8976 *pop1 = op1;
8977 return code;
8980 /* Convert comparison codes we use to represent FP comparison to integer
8981 code that will result in proper branch. Return UNKNOWN if no such code
8982 is available. */
8983 static enum rtx_code
8984 ix86_fp_compare_code_to_integer (enum rtx_code code)
8986 switch (code)
8988 case GT:
8989 return GTU;
8990 case GE:
8991 return GEU;
8992 case ORDERED:
8993 case UNORDERED:
8994 return code;
8995 break;
8996 case UNEQ:
8997 return EQ;
8998 break;
8999 case UNLT:
9000 return LTU;
9001 break;
9002 case UNLE:
9003 return LEU;
9004 break;
9005 case LTGT:
9006 return NE;
9007 break;
9008 default:
9009 return UNKNOWN;
9013 /* Split comparison code CODE into comparisons we can do using branch
9014 instructions. BYPASS_CODE is comparison code for branch that will
9015 branch around FIRST_CODE and SECOND_CODE. If some of branches
9016 is not required, set value to NIL.
9017 We never require more than two branches. */
9018 static void
9019 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9020 enum rtx_code *first_code,
9021 enum rtx_code *second_code)
9023 *first_code = code;
9024 *bypass_code = NIL;
9025 *second_code = NIL;
9027 /* The fcomi comparison sets flags as follows:
9029 cmp ZF PF CF
9030 > 0 0 0
9031 < 0 0 1
9032 = 1 0 0
9033 un 1 1 1 */
9035 switch (code)
9037 case GT: /* GTU - CF=0 & ZF=0 */
9038 case GE: /* GEU - CF=0 */
9039 case ORDERED: /* PF=0 */
9040 case UNORDERED: /* PF=1 */
9041 case UNEQ: /* EQ - ZF=1 */
9042 case UNLT: /* LTU - CF=1 */
9043 case UNLE: /* LEU - CF=1 | ZF=1 */
9044 case LTGT: /* EQ - ZF=0 */
9045 break;
9046 case LT: /* LTU - CF=1 - fails on unordered */
9047 *first_code = UNLT;
9048 *bypass_code = UNORDERED;
9049 break;
9050 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9051 *first_code = UNLE;
9052 *bypass_code = UNORDERED;
9053 break;
9054 case EQ: /* EQ - ZF=1 - fails on unordered */
9055 *first_code = UNEQ;
9056 *bypass_code = UNORDERED;
9057 break;
9058 case NE: /* NE - ZF=0 - fails on unordered */
9059 *first_code = LTGT;
9060 *second_code = UNORDERED;
9061 break;
9062 case UNGE: /* GEU - CF=0 - fails on unordered */
9063 *first_code = GE;
9064 *second_code = UNORDERED;
9065 break;
9066 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9067 *first_code = GT;
9068 *second_code = UNORDERED;
9069 break;
9070 default:
9071 abort ();
9073 if (!TARGET_IEEE_FP)
9075 *second_code = NIL;
9076 *bypass_code = NIL;
9080 /* Return cost of comparison done fcom + arithmetics operations on AX.
9081 All following functions do use number of instructions as a cost metrics.
9082 In future this should be tweaked to compute bytes for optimize_size and
9083 take into account performance of various instructions on various CPUs. */
9084 static int
9085 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9087 if (!TARGET_IEEE_FP)
9088 return 4;
9089 /* The cost of code output by ix86_expand_fp_compare. */
9090 switch (code)
9092 case UNLE:
9093 case UNLT:
9094 case LTGT:
9095 case GT:
9096 case GE:
9097 case UNORDERED:
9098 case ORDERED:
9099 case UNEQ:
9100 return 4;
9101 break;
9102 case LT:
9103 case NE:
9104 case EQ:
9105 case UNGE:
9106 return 5;
9107 break;
9108 case LE:
9109 case UNGT:
9110 return 6;
9111 break;
9112 default:
9113 abort ();
9117 /* Return cost of comparison done using fcomi operation.
9118 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9119 static int
9120 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9122 enum rtx_code bypass_code, first_code, second_code;
9123 /* Return arbitrarily high cost when instruction is not supported - this
9124 prevents gcc from using it. */
9125 if (!TARGET_CMOVE)
9126 return 1024;
9127 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9128 return (bypass_code != NIL || second_code != NIL) + 2;
9131 /* Return cost of comparison done using sahf operation.
9132 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9133 static int
9134 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9136 enum rtx_code bypass_code, first_code, second_code;
9137 /* Return arbitrarily high cost when instruction is not preferred - this
9138 avoids gcc from using it. */
9139 if (!TARGET_USE_SAHF && !optimize_size)
9140 return 1024;
9141 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9142 return (bypass_code != NIL || second_code != NIL) + 3;
9145 /* Compute cost of the comparison done using any method.
9146 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9147 static int
9148 ix86_fp_comparison_cost (enum rtx_code code)
9150 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9151 int min;
9153 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9154 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9156 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9157 if (min > sahf_cost)
9158 min = sahf_cost;
9159 if (min > fcomi_cost)
9160 min = fcomi_cost;
9161 return min;
9164 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9166 static rtx
9167 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9168 rtx *second_test, rtx *bypass_test)
9170 enum machine_mode fpcmp_mode, intcmp_mode;
9171 rtx tmp, tmp2;
9172 int cost = ix86_fp_comparison_cost (code);
9173 enum rtx_code bypass_code, first_code, second_code;
9175 fpcmp_mode = ix86_fp_compare_mode (code);
9176 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9178 if (second_test)
9179 *second_test = NULL_RTX;
9180 if (bypass_test)
9181 *bypass_test = NULL_RTX;
9183 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9185 /* Do fcomi/sahf based test when profitable. */
9186 if ((bypass_code == NIL || bypass_test)
9187 && (second_code == NIL || second_test)
9188 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9190 if (TARGET_CMOVE)
9192 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9193 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9194 tmp);
9195 emit_insn (tmp);
9197 else
9199 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9200 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9201 if (!scratch)
9202 scratch = gen_reg_rtx (HImode);
9203 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9204 emit_insn (gen_x86_sahf_1 (scratch));
9207 /* The FP codes work out to act like unsigned. */
9208 intcmp_mode = fpcmp_mode;
9209 code = first_code;
9210 if (bypass_code != NIL)
9211 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9212 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9213 const0_rtx);
9214 if (second_code != NIL)
9215 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9216 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9217 const0_rtx);
9219 else
9221 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9222 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9223 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9224 if (!scratch)
9225 scratch = gen_reg_rtx (HImode);
9226 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9228 /* In the unordered case, we have to check C2 for NaN's, which
9229 doesn't happen to work out to anything nice combination-wise.
9230 So do some bit twiddling on the value we've got in AH to come
9231 up with an appropriate set of condition codes. */
9233 intcmp_mode = CCNOmode;
9234 switch (code)
9236 case GT:
9237 case UNGT:
9238 if (code == GT || !TARGET_IEEE_FP)
9240 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9241 code = EQ;
9243 else
9245 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9246 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9247 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9248 intcmp_mode = CCmode;
9249 code = GEU;
9251 break;
9252 case LT:
9253 case UNLT:
9254 if (code == LT && TARGET_IEEE_FP)
9256 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9257 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9258 intcmp_mode = CCmode;
9259 code = EQ;
9261 else
9263 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9264 code = NE;
9266 break;
9267 case GE:
9268 case UNGE:
9269 if (code == GE || !TARGET_IEEE_FP)
9271 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9272 code = EQ;
9274 else
9276 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9277 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9278 GEN_INT (0x01)));
9279 code = NE;
9281 break;
9282 case LE:
9283 case UNLE:
9284 if (code == LE && TARGET_IEEE_FP)
9286 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9287 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9288 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9289 intcmp_mode = CCmode;
9290 code = LTU;
9292 else
9294 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9295 code = NE;
9297 break;
9298 case EQ:
9299 case UNEQ:
9300 if (code == EQ && TARGET_IEEE_FP)
9302 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9303 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9304 intcmp_mode = CCmode;
9305 code = EQ;
9307 else
9309 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9310 code = NE;
9311 break;
9313 break;
9314 case NE:
9315 case LTGT:
9316 if (code == NE && TARGET_IEEE_FP)
9318 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9319 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9320 GEN_INT (0x40)));
9321 code = NE;
9323 else
9325 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9326 code = EQ;
9328 break;
9330 case UNORDERED:
9331 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9332 code = NE;
9333 break;
9334 case ORDERED:
9335 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9336 code = EQ;
9337 break;
9339 default:
9340 abort ();
9344 /* Return the test that should be put into the flags user, i.e.
9345 the bcc, scc, or cmov instruction. */
9346 return gen_rtx_fmt_ee (code, VOIDmode,
9347 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9348 const0_rtx);
9352 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9354 rtx op0, op1, ret;
9355 op0 = ix86_compare_op0;
9356 op1 = ix86_compare_op1;
9358 if (second_test)
9359 *second_test = NULL_RTX;
9360 if (bypass_test)
9361 *bypass_test = NULL_RTX;
9363 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9364 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9365 second_test, bypass_test);
9366 else
9367 ret = ix86_expand_int_compare (code, op0, op1);
9369 return ret;
9372 /* Return true if the CODE will result in nontrivial jump sequence. */
9373 bool
9374 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9376 enum rtx_code bypass_code, first_code, second_code;
9377 if (!TARGET_CMOVE)
9378 return true;
9379 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9380 return bypass_code != NIL || second_code != NIL;
9383 void
9384 ix86_expand_branch (enum rtx_code code, rtx label)
9386 rtx tmp;
9388 switch (GET_MODE (ix86_compare_op0))
9390 case QImode:
9391 case HImode:
9392 case SImode:
9393 simple:
9394 tmp = ix86_expand_compare (code, NULL, NULL);
9395 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9396 gen_rtx_LABEL_REF (VOIDmode, label),
9397 pc_rtx);
9398 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9399 return;
9401 case SFmode:
9402 case DFmode:
9403 case XFmode:
9405 rtvec vec;
9406 int use_fcomi;
9407 enum rtx_code bypass_code, first_code, second_code;
9409 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9410 &ix86_compare_op1);
9412 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9414 /* Check whether we will use the natural sequence with one jump. If
9415 so, we can expand jump early. Otherwise delay expansion by
9416 creating compound insn to not confuse optimizers. */
9417 if (bypass_code == NIL && second_code == NIL
9418 && TARGET_CMOVE)
9420 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9421 gen_rtx_LABEL_REF (VOIDmode, label),
9422 pc_rtx, NULL_RTX);
9424 else
9426 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9427 ix86_compare_op0, ix86_compare_op1);
9428 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9429 gen_rtx_LABEL_REF (VOIDmode, label),
9430 pc_rtx);
9431 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9433 use_fcomi = ix86_use_fcomi_compare (code);
9434 vec = rtvec_alloc (3 + !use_fcomi);
9435 RTVEC_ELT (vec, 0) = tmp;
9436 RTVEC_ELT (vec, 1)
9437 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9438 RTVEC_ELT (vec, 2)
9439 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9440 if (! use_fcomi)
9441 RTVEC_ELT (vec, 3)
9442 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9444 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9446 return;
9449 case DImode:
9450 if (TARGET_64BIT)
9451 goto simple;
9452 /* Expand DImode branch into multiple compare+branch. */
9454 rtx lo[2], hi[2], label2;
9455 enum rtx_code code1, code2, code3;
9457 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9459 tmp = ix86_compare_op0;
9460 ix86_compare_op0 = ix86_compare_op1;
9461 ix86_compare_op1 = tmp;
9462 code = swap_condition (code);
9464 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9465 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9467 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9468 avoid two branches. This costs one extra insn, so disable when
9469 optimizing for size. */
9471 if ((code == EQ || code == NE)
9472 && (!optimize_size
9473 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9475 rtx xor0, xor1;
9477 xor1 = hi[0];
9478 if (hi[1] != const0_rtx)
9479 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9480 NULL_RTX, 0, OPTAB_WIDEN);
9482 xor0 = lo[0];
9483 if (lo[1] != const0_rtx)
9484 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9485 NULL_RTX, 0, OPTAB_WIDEN);
9487 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9488 NULL_RTX, 0, OPTAB_WIDEN);
9490 ix86_compare_op0 = tmp;
9491 ix86_compare_op1 = const0_rtx;
9492 ix86_expand_branch (code, label);
9493 return;
9496 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9497 op1 is a constant and the low word is zero, then we can just
9498 examine the high word. */
9500 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9501 switch (code)
9503 case LT: case LTU: case GE: case GEU:
9504 ix86_compare_op0 = hi[0];
9505 ix86_compare_op1 = hi[1];
9506 ix86_expand_branch (code, label);
9507 return;
9508 default:
9509 break;
9512 /* Otherwise, we need two or three jumps. */
9514 label2 = gen_label_rtx ();
9516 code1 = code;
9517 code2 = swap_condition (code);
9518 code3 = unsigned_condition (code);
9520 switch (code)
9522 case LT: case GT: case LTU: case GTU:
9523 break;
9525 case LE: code1 = LT; code2 = GT; break;
9526 case GE: code1 = GT; code2 = LT; break;
9527 case LEU: code1 = LTU; code2 = GTU; break;
9528 case GEU: code1 = GTU; code2 = LTU; break;
9530 case EQ: code1 = NIL; code2 = NE; break;
9531 case NE: code2 = NIL; break;
9533 default:
9534 abort ();
9538 * a < b =>
9539 * if (hi(a) < hi(b)) goto true;
9540 * if (hi(a) > hi(b)) goto false;
9541 * if (lo(a) < lo(b)) goto true;
9542 * false:
9545 ix86_compare_op0 = hi[0];
9546 ix86_compare_op1 = hi[1];
9548 if (code1 != NIL)
9549 ix86_expand_branch (code1, label);
9550 if (code2 != NIL)
9551 ix86_expand_branch (code2, label2);
9553 ix86_compare_op0 = lo[0];
9554 ix86_compare_op1 = lo[1];
9555 ix86_expand_branch (code3, label);
9557 if (code2 != NIL)
9558 emit_label (label2);
9559 return;
9562 default:
9563 abort ();
9567 /* Split branch based on floating point condition. */
9568 void
9569 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9570 rtx target1, rtx target2, rtx tmp)
9572 rtx second, bypass;
9573 rtx label = NULL_RTX;
9574 rtx condition;
9575 int bypass_probability = -1, second_probability = -1, probability = -1;
9576 rtx i;
9578 if (target2 != pc_rtx)
9580 rtx tmp = target2;
9581 code = reverse_condition_maybe_unordered (code);
9582 target2 = target1;
9583 target1 = tmp;
9586 condition = ix86_expand_fp_compare (code, op1, op2,
9587 tmp, &second, &bypass);
9589 if (split_branch_probability >= 0)
9591 /* Distribute the probabilities across the jumps.
9592 Assume the BYPASS and SECOND to be always test
9593 for UNORDERED. */
9594 probability = split_branch_probability;
9596 /* Value of 1 is low enough to make no need for probability
9597 to be updated. Later we may run some experiments and see
9598 if unordered values are more frequent in practice. */
9599 if (bypass)
9600 bypass_probability = 1;
9601 if (second)
9602 second_probability = 1;
9604 if (bypass != NULL_RTX)
9606 label = gen_label_rtx ();
9607 i = emit_jump_insn (gen_rtx_SET
9608 (VOIDmode, pc_rtx,
9609 gen_rtx_IF_THEN_ELSE (VOIDmode,
9610 bypass,
9611 gen_rtx_LABEL_REF (VOIDmode,
9612 label),
9613 pc_rtx)));
9614 if (bypass_probability >= 0)
9615 REG_NOTES (i)
9616 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9617 GEN_INT (bypass_probability),
9618 REG_NOTES (i));
9620 i = emit_jump_insn (gen_rtx_SET
9621 (VOIDmode, pc_rtx,
9622 gen_rtx_IF_THEN_ELSE (VOIDmode,
9623 condition, target1, target2)));
9624 if (probability >= 0)
9625 REG_NOTES (i)
9626 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9627 GEN_INT (probability),
9628 REG_NOTES (i));
9629 if (second != NULL_RTX)
9631 i = emit_jump_insn (gen_rtx_SET
9632 (VOIDmode, pc_rtx,
9633 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9634 target2)));
9635 if (second_probability >= 0)
9636 REG_NOTES (i)
9637 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9638 GEN_INT (second_probability),
9639 REG_NOTES (i));
9641 if (label != NULL_RTX)
9642 emit_label (label);
9646 ix86_expand_setcc (enum rtx_code code, rtx dest)
9648 rtx ret, tmp, tmpreg, equiv;
9649 rtx second_test, bypass_test;
9651 if (GET_MODE (ix86_compare_op0) == DImode
9652 && !TARGET_64BIT)
9653 return 0; /* FAIL */
9655 if (GET_MODE (dest) != QImode)
9656 abort ();
9658 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9659 PUT_MODE (ret, QImode);
9661 tmp = dest;
9662 tmpreg = dest;
9664 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9665 if (bypass_test || second_test)
9667 rtx test = second_test;
9668 int bypass = 0;
9669 rtx tmp2 = gen_reg_rtx (QImode);
9670 if (bypass_test)
9672 if (second_test)
9673 abort ();
9674 test = bypass_test;
9675 bypass = 1;
9676 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9678 PUT_MODE (test, QImode);
9679 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9681 if (bypass)
9682 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9683 else
9684 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9687 /* Attach a REG_EQUAL note describing the comparison result. */
9688 equiv = simplify_gen_relational (code, QImode,
9689 GET_MODE (ix86_compare_op0),
9690 ix86_compare_op0, ix86_compare_op1);
9691 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9693 return 1; /* DONE */
9696 /* Expand comparison setting or clearing carry flag. Return true when
9697 successful and set pop for the operation. */
9698 static bool
9699 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9701 enum machine_mode mode =
9702 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9704 /* Do not handle DImode compares that go trought special path. Also we can't
9705 deal with FP compares yet. This is possible to add. */
9706 if ((mode == DImode && !TARGET_64BIT))
9707 return false;
9708 if (FLOAT_MODE_P (mode))
9710 rtx second_test = NULL, bypass_test = NULL;
9711 rtx compare_op, compare_seq;
9713 /* Shortcut: following common codes never translate into carry flag compares. */
9714 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9715 || code == ORDERED || code == UNORDERED)
9716 return false;
9718 /* These comparisons require zero flag; swap operands so they won't. */
9719 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9720 && !TARGET_IEEE_FP)
9722 rtx tmp = op0;
9723 op0 = op1;
9724 op1 = tmp;
9725 code = swap_condition (code);
9728 /* Try to expand the comparison and verify that we end up with carry flag
9729 based comparison. This is fails to be true only when we decide to expand
9730 comparison using arithmetic that is not too common scenario. */
9731 start_sequence ();
9732 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9733 &second_test, &bypass_test);
9734 compare_seq = get_insns ();
9735 end_sequence ();
9737 if (second_test || bypass_test)
9738 return false;
9739 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9740 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9741 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9742 else
9743 code = GET_CODE (compare_op);
9744 if (code != LTU && code != GEU)
9745 return false;
9746 emit_insn (compare_seq);
9747 *pop = compare_op;
9748 return true;
9750 if (!INTEGRAL_MODE_P (mode))
9751 return false;
9752 switch (code)
9754 case LTU:
9755 case GEU:
9756 break;
9758 /* Convert a==0 into (unsigned)a<1. */
9759 case EQ:
9760 case NE:
9761 if (op1 != const0_rtx)
9762 return false;
9763 op1 = const1_rtx;
9764 code = (code == EQ ? LTU : GEU);
9765 break;
9767 /* Convert a>b into b<a or a>=b-1. */
9768 case GTU:
9769 case LEU:
9770 if (GET_CODE (op1) == CONST_INT)
9772 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9773 /* Bail out on overflow. We still can swap operands but that
9774 would force loading of the constant into register. */
9775 if (op1 == const0_rtx
9776 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9777 return false;
9778 code = (code == GTU ? GEU : LTU);
9780 else
9782 rtx tmp = op1;
9783 op1 = op0;
9784 op0 = tmp;
9785 code = (code == GTU ? LTU : GEU);
9787 break;
9789 /* Convert a>=0 into (unsigned)a<0x80000000. */
9790 case LT:
9791 case GE:
9792 if (mode == DImode || op1 != const0_rtx)
9793 return false;
9794 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9795 code = (code == LT ? GEU : LTU);
9796 break;
9797 case LE:
9798 case GT:
9799 if (mode == DImode || op1 != constm1_rtx)
9800 return false;
9801 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9802 code = (code == LE ? GEU : LTU);
9803 break;
9805 default:
9806 return false;
9808 /* Swapping operands may cause constant to appear as first operand. */
9809 if (!nonimmediate_operand (op0, VOIDmode))
9811 if (no_new_pseudos)
9812 return false;
9813 op0 = force_reg (mode, op0);
9815 ix86_compare_op0 = op0;
9816 ix86_compare_op1 = op1;
9817 *pop = ix86_expand_compare (code, NULL, NULL);
9818 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9819 abort ();
9820 return true;
9824 ix86_expand_int_movcc (rtx operands[])
9826 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9827 rtx compare_seq, compare_op;
9828 rtx second_test, bypass_test;
9829 enum machine_mode mode = GET_MODE (operands[0]);
9830 bool sign_bit_compare_p = false;;
9832 start_sequence ();
9833 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9834 compare_seq = get_insns ();
9835 end_sequence ();
9837 compare_code = GET_CODE (compare_op);
9839 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9840 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9841 sign_bit_compare_p = true;
9843 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9844 HImode insns, we'd be swallowed in word prefix ops. */
9846 if ((mode != HImode || TARGET_FAST_PREFIX)
9847 && (mode != DImode || TARGET_64BIT)
9848 && GET_CODE (operands[2]) == CONST_INT
9849 && GET_CODE (operands[3]) == CONST_INT)
9851 rtx out = operands[0];
9852 HOST_WIDE_INT ct = INTVAL (operands[2]);
9853 HOST_WIDE_INT cf = INTVAL (operands[3]);
9854 HOST_WIDE_INT diff;
9856 diff = ct - cf;
9857 /* Sign bit compares are better done using shifts than we do by using
9858 sbb. */
9859 if (sign_bit_compare_p
9860 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9861 ix86_compare_op1, &compare_op))
9863 /* Detect overlap between destination and compare sources. */
9864 rtx tmp = out;
9866 if (!sign_bit_compare_p)
9868 bool fpcmp = false;
9870 compare_code = GET_CODE (compare_op);
9872 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9873 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9875 fpcmp = true;
9876 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9879 /* To simplify rest of code, restrict to the GEU case. */
9880 if (compare_code == LTU)
9882 HOST_WIDE_INT tmp = ct;
9883 ct = cf;
9884 cf = tmp;
9885 compare_code = reverse_condition (compare_code);
9886 code = reverse_condition (code);
9888 else
9890 if (fpcmp)
9891 PUT_CODE (compare_op,
9892 reverse_condition_maybe_unordered
9893 (GET_CODE (compare_op)));
9894 else
9895 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9897 diff = ct - cf;
9899 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9900 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9901 tmp = gen_reg_rtx (mode);
9903 if (mode == DImode)
9904 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9905 else
9906 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9908 else
9910 if (code == GT || code == GE)
9911 code = reverse_condition (code);
9912 else
9914 HOST_WIDE_INT tmp = ct;
9915 ct = cf;
9916 cf = tmp;
9917 diff = ct - cf;
9919 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9920 ix86_compare_op1, VOIDmode, 0, -1);
9923 if (diff == 1)
9926 * cmpl op0,op1
9927 * sbbl dest,dest
9928 * [addl dest, ct]
9930 * Size 5 - 8.
9932 if (ct)
9933 tmp = expand_simple_binop (mode, PLUS,
9934 tmp, GEN_INT (ct),
9935 copy_rtx (tmp), 1, OPTAB_DIRECT);
9937 else if (cf == -1)
9940 * cmpl op0,op1
9941 * sbbl dest,dest
9942 * orl $ct, dest
9944 * Size 8.
9946 tmp = expand_simple_binop (mode, IOR,
9947 tmp, GEN_INT (ct),
9948 copy_rtx (tmp), 1, OPTAB_DIRECT);
9950 else if (diff == -1 && ct)
9953 * cmpl op0,op1
9954 * sbbl dest,dest
9955 * notl dest
9956 * [addl dest, cf]
9958 * Size 8 - 11.
9960 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9961 if (cf)
9962 tmp = expand_simple_binop (mode, PLUS,
9963 copy_rtx (tmp), GEN_INT (cf),
9964 copy_rtx (tmp), 1, OPTAB_DIRECT);
9966 else
9969 * cmpl op0,op1
9970 * sbbl dest,dest
9971 * [notl dest]
9972 * andl cf - ct, dest
9973 * [addl dest, ct]
9975 * Size 8 - 11.
9978 if (cf == 0)
9980 cf = ct;
9981 ct = 0;
9982 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9985 tmp = expand_simple_binop (mode, AND,
9986 copy_rtx (tmp),
9987 gen_int_mode (cf - ct, mode),
9988 copy_rtx (tmp), 1, OPTAB_DIRECT);
9989 if (ct)
9990 tmp = expand_simple_binop (mode, PLUS,
9991 copy_rtx (tmp), GEN_INT (ct),
9992 copy_rtx (tmp), 1, OPTAB_DIRECT);
9995 if (!rtx_equal_p (tmp, out))
9996 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9998 return 1; /* DONE */
10001 if (diff < 0)
10003 HOST_WIDE_INT tmp;
10004 tmp = ct, ct = cf, cf = tmp;
10005 diff = -diff;
10006 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10008 /* We may be reversing unordered compare to normal compare, that
10009 is not valid in general (we may convert non-trapping condition
10010 to trapping one), however on i386 we currently emit all
10011 comparisons unordered. */
10012 compare_code = reverse_condition_maybe_unordered (compare_code);
10013 code = reverse_condition_maybe_unordered (code);
10015 else
10017 compare_code = reverse_condition (compare_code);
10018 code = reverse_condition (code);
10022 compare_code = NIL;
10023 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10024 && GET_CODE (ix86_compare_op1) == CONST_INT)
10026 if (ix86_compare_op1 == const0_rtx
10027 && (code == LT || code == GE))
10028 compare_code = code;
10029 else if (ix86_compare_op1 == constm1_rtx)
10031 if (code == LE)
10032 compare_code = LT;
10033 else if (code == GT)
10034 compare_code = GE;
10038 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10039 if (compare_code != NIL
10040 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10041 && (cf == -1 || ct == -1))
10043 /* If lea code below could be used, only optimize
10044 if it results in a 2 insn sequence. */
10046 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10047 || diff == 3 || diff == 5 || diff == 9)
10048 || (compare_code == LT && ct == -1)
10049 || (compare_code == GE && cf == -1))
10052 * notl op1 (if necessary)
10053 * sarl $31, op1
10054 * orl cf, op1
10056 if (ct != -1)
10058 cf = ct;
10059 ct = -1;
10060 code = reverse_condition (code);
10063 out = emit_store_flag (out, code, ix86_compare_op0,
10064 ix86_compare_op1, VOIDmode, 0, -1);
10066 out = expand_simple_binop (mode, IOR,
10067 out, GEN_INT (cf),
10068 out, 1, OPTAB_DIRECT);
10069 if (out != operands[0])
10070 emit_move_insn (operands[0], out);
10072 return 1; /* DONE */
10077 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10078 || diff == 3 || diff == 5 || diff == 9)
10079 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10080 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10083 * xorl dest,dest
10084 * cmpl op1,op2
10085 * setcc dest
10086 * lea cf(dest*(ct-cf)),dest
10088 * Size 14.
10090 * This also catches the degenerate setcc-only case.
10093 rtx tmp;
10094 int nops;
10096 out = emit_store_flag (out, code, ix86_compare_op0,
10097 ix86_compare_op1, VOIDmode, 0, 1);
10099 nops = 0;
10100 /* On x86_64 the lea instruction operates on Pmode, so we need
10101 to get arithmetics done in proper mode to match. */
10102 if (diff == 1)
10103 tmp = copy_rtx (out);
10104 else
10106 rtx out1;
10107 out1 = copy_rtx (out);
10108 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10109 nops++;
10110 if (diff & 1)
10112 tmp = gen_rtx_PLUS (mode, tmp, out1);
10113 nops++;
10116 if (cf != 0)
10118 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10119 nops++;
10121 if (!rtx_equal_p (tmp, out))
10123 if (nops == 1)
10124 out = force_operand (tmp, copy_rtx (out));
10125 else
10126 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10128 if (!rtx_equal_p (out, operands[0]))
10129 emit_move_insn (operands[0], copy_rtx (out));
10131 return 1; /* DONE */
10135 * General case: Jumpful:
10136 * xorl dest,dest cmpl op1, op2
10137 * cmpl op1, op2 movl ct, dest
10138 * setcc dest jcc 1f
10139 * decl dest movl cf, dest
10140 * andl (cf-ct),dest 1:
10141 * addl ct,dest
10143 * Size 20. Size 14.
10145 * This is reasonably steep, but branch mispredict costs are
10146 * high on modern cpus, so consider failing only if optimizing
10147 * for space.
10150 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10151 && BRANCH_COST >= 2)
10153 if (cf == 0)
10155 cf = ct;
10156 ct = 0;
10157 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10158 /* We may be reversing unordered compare to normal compare,
10159 that is not valid in general (we may convert non-trapping
10160 condition to trapping one), however on i386 we currently
10161 emit all comparisons unordered. */
10162 code = reverse_condition_maybe_unordered (code);
10163 else
10165 code = reverse_condition (code);
10166 if (compare_code != NIL)
10167 compare_code = reverse_condition (compare_code);
10171 if (compare_code != NIL)
10173 /* notl op1 (if needed)
10174 sarl $31, op1
10175 andl (cf-ct), op1
10176 addl ct, op1
10178 For x < 0 (resp. x <= -1) there will be no notl,
10179 so if possible swap the constants to get rid of the
10180 complement.
10181 True/false will be -1/0 while code below (store flag
10182 followed by decrement) is 0/-1, so the constants need
10183 to be exchanged once more. */
10185 if (compare_code == GE || !cf)
10187 code = reverse_condition (code);
10188 compare_code = LT;
10190 else
10192 HOST_WIDE_INT tmp = cf;
10193 cf = ct;
10194 ct = tmp;
10197 out = emit_store_flag (out, code, ix86_compare_op0,
10198 ix86_compare_op1, VOIDmode, 0, -1);
10200 else
10202 out = emit_store_flag (out, code, ix86_compare_op0,
10203 ix86_compare_op1, VOIDmode, 0, 1);
10205 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10206 copy_rtx (out), 1, OPTAB_DIRECT);
10209 out = expand_simple_binop (mode, AND, copy_rtx (out),
10210 gen_int_mode (cf - ct, mode),
10211 copy_rtx (out), 1, OPTAB_DIRECT);
10212 if (ct)
10213 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10214 copy_rtx (out), 1, OPTAB_DIRECT);
10215 if (!rtx_equal_p (out, operands[0]))
10216 emit_move_insn (operands[0], copy_rtx (out));
10218 return 1; /* DONE */
10222 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10224 /* Try a few things more with specific constants and a variable. */
10226 optab op;
10227 rtx var, orig_out, out, tmp;
10229 if (BRANCH_COST <= 2)
10230 return 0; /* FAIL */
10232 /* If one of the two operands is an interesting constant, load a
10233 constant with the above and mask it in with a logical operation. */
10235 if (GET_CODE (operands[2]) == CONST_INT)
10237 var = operands[3];
10238 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10239 operands[3] = constm1_rtx, op = and_optab;
10240 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10241 operands[3] = const0_rtx, op = ior_optab;
10242 else
10243 return 0; /* FAIL */
10245 else if (GET_CODE (operands[3]) == CONST_INT)
10247 var = operands[2];
10248 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10249 operands[2] = constm1_rtx, op = and_optab;
10250 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10251 operands[2] = const0_rtx, op = ior_optab;
10252 else
10253 return 0; /* FAIL */
10255 else
10256 return 0; /* FAIL */
10258 orig_out = operands[0];
10259 tmp = gen_reg_rtx (mode);
10260 operands[0] = tmp;
10262 /* Recurse to get the constant loaded. */
10263 if (ix86_expand_int_movcc (operands) == 0)
10264 return 0; /* FAIL */
10266 /* Mask in the interesting variable. */
10267 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10268 OPTAB_WIDEN);
10269 if (!rtx_equal_p (out, orig_out))
10270 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10272 return 1; /* DONE */
10276 * For comparison with above,
10278 * movl cf,dest
10279 * movl ct,tmp
10280 * cmpl op1,op2
10281 * cmovcc tmp,dest
10283 * Size 15.
10286 if (! nonimmediate_operand (operands[2], mode))
10287 operands[2] = force_reg (mode, operands[2]);
10288 if (! nonimmediate_operand (operands[3], mode))
10289 operands[3] = force_reg (mode, operands[3]);
10291 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10293 rtx tmp = gen_reg_rtx (mode);
10294 emit_move_insn (tmp, operands[3]);
10295 operands[3] = tmp;
10297 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10299 rtx tmp = gen_reg_rtx (mode);
10300 emit_move_insn (tmp, operands[2]);
10301 operands[2] = tmp;
10304 if (! register_operand (operands[2], VOIDmode)
10305 && (mode == QImode
10306 || ! register_operand (operands[3], VOIDmode)))
10307 operands[2] = force_reg (mode, operands[2]);
10309 if (mode == QImode
10310 && ! register_operand (operands[3], VOIDmode))
10311 operands[3] = force_reg (mode, operands[3]);
10313 emit_insn (compare_seq);
10314 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10315 gen_rtx_IF_THEN_ELSE (mode,
10316 compare_op, operands[2],
10317 operands[3])));
10318 if (bypass_test)
10319 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10320 gen_rtx_IF_THEN_ELSE (mode,
10321 bypass_test,
10322 copy_rtx (operands[3]),
10323 copy_rtx (operands[0]))));
10324 if (second_test)
10325 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10326 gen_rtx_IF_THEN_ELSE (mode,
10327 second_test,
10328 copy_rtx (operands[2]),
10329 copy_rtx (operands[0]))));
10331 return 1; /* DONE */
10335 ix86_expand_fp_movcc (rtx operands[])
10337 enum rtx_code code;
10338 rtx tmp;
10339 rtx compare_op, second_test, bypass_test;
10341 /* For SF/DFmode conditional moves based on comparisons
10342 in same mode, we may want to use SSE min/max instructions. */
10343 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10344 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10345 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10346 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10347 && (!TARGET_IEEE_FP
10348 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10349 /* We may be called from the post-reload splitter. */
10350 && (!REG_P (operands[0])
10351 || SSE_REG_P (operands[0])
10352 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10354 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10355 code = GET_CODE (operands[1]);
10357 /* See if we have (cross) match between comparison operands and
10358 conditional move operands. */
10359 if (rtx_equal_p (operands[2], op1))
10361 rtx tmp = op0;
10362 op0 = op1;
10363 op1 = tmp;
10364 code = reverse_condition_maybe_unordered (code);
10366 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10368 /* Check for min operation. */
10369 if (code == LT || code == UNLE)
10371 if (code == UNLE)
10373 rtx tmp = op0;
10374 op0 = op1;
10375 op1 = tmp;
10377 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10378 if (memory_operand (op0, VOIDmode))
10379 op0 = force_reg (GET_MODE (operands[0]), op0);
10380 if (GET_MODE (operands[0]) == SFmode)
10381 emit_insn (gen_minsf3 (operands[0], op0, op1));
10382 else
10383 emit_insn (gen_mindf3 (operands[0], op0, op1));
10384 return 1;
10386 /* Check for max operation. */
10387 if (code == GT || code == UNGE)
10389 if (code == UNGE)
10391 rtx tmp = op0;
10392 op0 = op1;
10393 op1 = tmp;
10395 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10396 if (memory_operand (op0, VOIDmode))
10397 op0 = force_reg (GET_MODE (operands[0]), op0);
10398 if (GET_MODE (operands[0]) == SFmode)
10399 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10400 else
10401 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10402 return 1;
10405 /* Manage condition to be sse_comparison_operator. In case we are
10406 in non-ieee mode, try to canonicalize the destination operand
10407 to be first in the comparison - this helps reload to avoid extra
10408 moves. */
10409 if (!sse_comparison_operator (operands[1], VOIDmode)
10410 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10412 rtx tmp = ix86_compare_op0;
10413 ix86_compare_op0 = ix86_compare_op1;
10414 ix86_compare_op1 = tmp;
10415 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10416 VOIDmode, ix86_compare_op0,
10417 ix86_compare_op1);
10419 /* Similarly try to manage result to be first operand of conditional
10420 move. We also don't support the NE comparison on SSE, so try to
10421 avoid it. */
10422 if ((rtx_equal_p (operands[0], operands[3])
10423 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10424 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10426 rtx tmp = operands[2];
10427 operands[2] = operands[3];
10428 operands[3] = tmp;
10429 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10430 (GET_CODE (operands[1])),
10431 VOIDmode, ix86_compare_op0,
10432 ix86_compare_op1);
10434 if (GET_MODE (operands[0]) == SFmode)
10435 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10436 operands[2], operands[3],
10437 ix86_compare_op0, ix86_compare_op1));
10438 else
10439 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10440 operands[2], operands[3],
10441 ix86_compare_op0, ix86_compare_op1));
10442 return 1;
10445 /* The floating point conditional move instructions don't directly
10446 support conditions resulting from a signed integer comparison. */
10448 code = GET_CODE (operands[1]);
10449 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10451 /* The floating point conditional move instructions don't directly
10452 support signed integer comparisons. */
10454 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10456 if (second_test != NULL || bypass_test != NULL)
10457 abort ();
10458 tmp = gen_reg_rtx (QImode);
10459 ix86_expand_setcc (code, tmp);
10460 code = NE;
10461 ix86_compare_op0 = tmp;
10462 ix86_compare_op1 = const0_rtx;
10463 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10465 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10467 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10468 emit_move_insn (tmp, operands[3]);
10469 operands[3] = tmp;
10471 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10473 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10474 emit_move_insn (tmp, operands[2]);
10475 operands[2] = tmp;
10478 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10479 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10480 compare_op,
10481 operands[2],
10482 operands[3])));
10483 if (bypass_test)
10484 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10485 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10486 bypass_test,
10487 operands[3],
10488 operands[0])));
10489 if (second_test)
10490 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10491 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10492 second_test,
10493 operands[2],
10494 operands[0])));
10496 return 1;
10499 /* Expand conditional increment or decrement using adb/sbb instructions.
10500 The default case using setcc followed by the conditional move can be
10501 done by generic code. */
10503 ix86_expand_int_addcc (rtx operands[])
10505 enum rtx_code code = GET_CODE (operands[1]);
10506 rtx compare_op;
10507 rtx val = const0_rtx;
10508 bool fpcmp = false;
10509 enum machine_mode mode = GET_MODE (operands[0]);
10511 if (operands[3] != const1_rtx
10512 && operands[3] != constm1_rtx)
10513 return 0;
10514 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10515 ix86_compare_op1, &compare_op))
10516 return 0;
10517 code = GET_CODE (compare_op);
10519 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10520 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10522 fpcmp = true;
10523 code = ix86_fp_compare_code_to_integer (code);
10526 if (code != LTU)
10528 val = constm1_rtx;
10529 if (fpcmp)
10530 PUT_CODE (compare_op,
10531 reverse_condition_maybe_unordered
10532 (GET_CODE (compare_op)));
10533 else
10534 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10536 PUT_MODE (compare_op, mode);
10538 /* Construct either adc or sbb insn. */
10539 if ((code == LTU) == (operands[3] == constm1_rtx))
10541 switch (GET_MODE (operands[0]))
10543 case QImode:
10544 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10545 break;
10546 case HImode:
10547 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10548 break;
10549 case SImode:
10550 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10551 break;
10552 case DImode:
10553 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10554 break;
10555 default:
10556 abort ();
10559 else
10561 switch (GET_MODE (operands[0]))
10563 case QImode:
10564 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10565 break;
10566 case HImode:
10567 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10568 break;
10569 case SImode:
10570 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10571 break;
10572 case DImode:
10573 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10574 break;
10575 default:
10576 abort ();
10579 return 1; /* DONE */
10583 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10584 works for floating pointer parameters and nonoffsetable memories.
10585 For pushes, it returns just stack offsets; the values will be saved
10586 in the right order. Maximally three parts are generated. */
10588 static int
10589 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10591 int size;
10593 if (!TARGET_64BIT)
10594 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10595 else
10596 size = (GET_MODE_SIZE (mode) + 4) / 8;
10598 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10599 abort ();
10600 if (size < 2 || size > 3)
10601 abort ();
10603 /* Optimize constant pool reference to immediates. This is used by fp
10604 moves, that force all constants to memory to allow combining. */
10605 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10607 rtx tmp = maybe_get_pool_constant (operand);
10608 if (tmp)
10609 operand = tmp;
10612 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10614 /* The only non-offsetable memories we handle are pushes. */
10615 if (! push_operand (operand, VOIDmode))
10616 abort ();
10618 operand = copy_rtx (operand);
10619 PUT_MODE (operand, Pmode);
10620 parts[0] = parts[1] = parts[2] = operand;
10622 else if (!TARGET_64BIT)
10624 if (mode == DImode)
10625 split_di (&operand, 1, &parts[0], &parts[1]);
10626 else
10628 if (REG_P (operand))
10630 if (!reload_completed)
10631 abort ();
10632 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10633 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10634 if (size == 3)
10635 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10637 else if (offsettable_memref_p (operand))
10639 operand = adjust_address (operand, SImode, 0);
10640 parts[0] = operand;
10641 parts[1] = adjust_address (operand, SImode, 4);
10642 if (size == 3)
10643 parts[2] = adjust_address (operand, SImode, 8);
10645 else if (GET_CODE (operand) == CONST_DOUBLE)
10647 REAL_VALUE_TYPE r;
10648 long l[4];
10650 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10651 switch (mode)
10653 case XFmode:
10654 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10655 parts[2] = gen_int_mode (l[2], SImode);
10656 break;
10657 case DFmode:
10658 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10659 break;
10660 default:
10661 abort ();
10663 parts[1] = gen_int_mode (l[1], SImode);
10664 parts[0] = gen_int_mode (l[0], SImode);
10666 else
10667 abort ();
10670 else
10672 if (mode == TImode)
10673 split_ti (&operand, 1, &parts[0], &parts[1]);
10674 if (mode == XFmode || mode == TFmode)
10676 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10677 if (REG_P (operand))
10679 if (!reload_completed)
10680 abort ();
10681 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10682 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10684 else if (offsettable_memref_p (operand))
10686 operand = adjust_address (operand, DImode, 0);
10687 parts[0] = operand;
10688 parts[1] = adjust_address (operand, upper_mode, 8);
10690 else if (GET_CODE (operand) == CONST_DOUBLE)
10692 REAL_VALUE_TYPE r;
10693 long l[3];
10695 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10696 real_to_target (l, &r, mode);
10697 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10698 if (HOST_BITS_PER_WIDE_INT >= 64)
10699 parts[0]
10700 = gen_int_mode
10701 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10702 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10703 DImode);
10704 else
10705 parts[0] = immed_double_const (l[0], l[1], DImode);
10706 if (upper_mode == SImode)
10707 parts[1] = gen_int_mode (l[2], SImode);
10708 else if (HOST_BITS_PER_WIDE_INT >= 64)
10709 parts[1]
10710 = gen_int_mode
10711 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10712 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10713 DImode);
10714 else
10715 parts[1] = immed_double_const (l[2], l[3], DImode);
10717 else
10718 abort ();
10722 return size;
10725 /* Emit insns to perform a move or push of DI, DF, and XF values.
10726 Return false when normal moves are needed; true when all required
10727 insns have been emitted. Operands 2-4 contain the input values
10728 int the correct order; operands 5-7 contain the output values. */
10730 void
10731 ix86_split_long_move (rtx operands[])
10733 rtx part[2][3];
10734 int nparts;
10735 int push = 0;
10736 int collisions = 0;
10737 enum machine_mode mode = GET_MODE (operands[0]);
10739 /* The DFmode expanders may ask us to move double.
10740 For 64bit target this is single move. By hiding the fact
10741 here we simplify i386.md splitters. */
10742 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10744 /* Optimize constant pool reference to immediates. This is used by
10745 fp moves, that force all constants to memory to allow combining. */
10747 if (GET_CODE (operands[1]) == MEM
10748 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10749 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10750 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10751 if (push_operand (operands[0], VOIDmode))
10753 operands[0] = copy_rtx (operands[0]);
10754 PUT_MODE (operands[0], Pmode);
10756 else
10757 operands[0] = gen_lowpart (DImode, operands[0]);
10758 operands[1] = gen_lowpart (DImode, operands[1]);
10759 emit_move_insn (operands[0], operands[1]);
10760 return;
10763 /* The only non-offsettable memory we handle is push. */
10764 if (push_operand (operands[0], VOIDmode))
10765 push = 1;
10766 else if (GET_CODE (operands[0]) == MEM
10767 && ! offsettable_memref_p (operands[0]))
10768 abort ();
10770 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10771 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10773 /* When emitting push, take care for source operands on the stack. */
10774 if (push && GET_CODE (operands[1]) == MEM
10775 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10777 if (nparts == 3)
10778 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10779 XEXP (part[1][2], 0));
10780 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10781 XEXP (part[1][1], 0));
10784 /* We need to do copy in the right order in case an address register
10785 of the source overlaps the destination. */
10786 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10788 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10789 collisions++;
10790 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10791 collisions++;
10792 if (nparts == 3
10793 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10794 collisions++;
10796 /* Collision in the middle part can be handled by reordering. */
10797 if (collisions == 1 && nparts == 3
10798 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10800 rtx tmp;
10801 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10802 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10805 /* If there are more collisions, we can't handle it by reordering.
10806 Do an lea to the last part and use only one colliding move. */
10807 else if (collisions > 1)
10809 rtx base;
10811 collisions = 1;
10813 base = part[0][nparts - 1];
10815 /* Handle the case when the last part isn't valid for lea.
10816 Happens in 64-bit mode storing the 12-byte XFmode. */
10817 if (GET_MODE (base) != Pmode)
10818 base = gen_rtx_REG (Pmode, REGNO (base));
10820 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10821 part[1][0] = replace_equiv_address (part[1][0], base);
10822 part[1][1] = replace_equiv_address (part[1][1],
10823 plus_constant (base, UNITS_PER_WORD));
10824 if (nparts == 3)
10825 part[1][2] = replace_equiv_address (part[1][2],
10826 plus_constant (base, 8));
10830 if (push)
10832 if (!TARGET_64BIT)
10834 if (nparts == 3)
10836 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10837 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10838 emit_move_insn (part[0][2], part[1][2]);
10841 else
10843 /* In 64bit mode we don't have 32bit push available. In case this is
10844 register, it is OK - we will just use larger counterpart. We also
10845 retype memory - these comes from attempt to avoid REX prefix on
10846 moving of second half of TFmode value. */
10847 if (GET_MODE (part[1][1]) == SImode)
10849 if (GET_CODE (part[1][1]) == MEM)
10850 part[1][1] = adjust_address (part[1][1], DImode, 0);
10851 else if (REG_P (part[1][1]))
10852 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10853 else
10854 abort ();
10855 if (GET_MODE (part[1][0]) == SImode)
10856 part[1][0] = part[1][1];
10859 emit_move_insn (part[0][1], part[1][1]);
10860 emit_move_insn (part[0][0], part[1][0]);
10861 return;
10864 /* Choose correct order to not overwrite the source before it is copied. */
10865 if ((REG_P (part[0][0])
10866 && REG_P (part[1][1])
10867 && (REGNO (part[0][0]) == REGNO (part[1][1])
10868 || (nparts == 3
10869 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10870 || (collisions > 0
10871 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10873 if (nparts == 3)
10875 operands[2] = part[0][2];
10876 operands[3] = part[0][1];
10877 operands[4] = part[0][0];
10878 operands[5] = part[1][2];
10879 operands[6] = part[1][1];
10880 operands[7] = part[1][0];
10882 else
10884 operands[2] = part[0][1];
10885 operands[3] = part[0][0];
10886 operands[5] = part[1][1];
10887 operands[6] = part[1][0];
10890 else
10892 if (nparts == 3)
10894 operands[2] = part[0][0];
10895 operands[3] = part[0][1];
10896 operands[4] = part[0][2];
10897 operands[5] = part[1][0];
10898 operands[6] = part[1][1];
10899 operands[7] = part[1][2];
10901 else
10903 operands[2] = part[0][0];
10904 operands[3] = part[0][1];
10905 operands[5] = part[1][0];
10906 operands[6] = part[1][1];
10909 emit_move_insn (operands[2], operands[5]);
10910 emit_move_insn (operands[3], operands[6]);
10911 if (nparts == 3)
10912 emit_move_insn (operands[4], operands[7]);
10914 return;
10917 void
10918 ix86_split_ashldi (rtx *operands, rtx scratch)
10920 rtx low[2], high[2];
10921 int count;
10923 if (GET_CODE (operands[2]) == CONST_INT)
10925 split_di (operands, 2, low, high);
10926 count = INTVAL (operands[2]) & 63;
10928 if (count >= 32)
10930 emit_move_insn (high[0], low[1]);
10931 emit_move_insn (low[0], const0_rtx);
10933 if (count > 32)
10934 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10936 else
10938 if (!rtx_equal_p (operands[0], operands[1]))
10939 emit_move_insn (operands[0], operands[1]);
10940 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10941 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10944 else
10946 if (!rtx_equal_p (operands[0], operands[1]))
10947 emit_move_insn (operands[0], operands[1]);
10949 split_di (operands, 1, low, high);
10951 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10952 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10954 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10956 if (! no_new_pseudos)
10957 scratch = force_reg (SImode, const0_rtx);
10958 else
10959 emit_move_insn (scratch, const0_rtx);
10961 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10962 scratch));
10964 else
10965 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10969 void
10970 ix86_split_ashrdi (rtx *operands, rtx scratch)
10972 rtx low[2], high[2];
10973 int count;
10975 if (GET_CODE (operands[2]) == CONST_INT)
10977 split_di (operands, 2, low, high);
10978 count = INTVAL (operands[2]) & 63;
10980 if (count == 63)
10982 emit_move_insn (high[0], high[1]);
10983 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10984 emit_move_insn (low[0], high[0]);
10987 else if (count >= 32)
10989 emit_move_insn (low[0], high[1]);
10991 if (! reload_completed)
10992 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10993 else
10995 emit_move_insn (high[0], low[0]);
10996 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10999 if (count > 32)
11000 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
11002 else
11004 if (!rtx_equal_p (operands[0], operands[1]))
11005 emit_move_insn (operands[0], operands[1]);
11006 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11007 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
11010 else
11012 if (!rtx_equal_p (operands[0], operands[1]))
11013 emit_move_insn (operands[0], operands[1]);
11015 split_di (operands, 1, low, high);
11017 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11018 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
11020 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11022 if (! no_new_pseudos)
11023 scratch = gen_reg_rtx (SImode);
11024 emit_move_insn (scratch, high[0]);
11025 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11026 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11027 scratch));
11029 else
11030 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11034 void
11035 ix86_split_lshrdi (rtx *operands, rtx scratch)
11037 rtx low[2], high[2];
11038 int count;
11040 if (GET_CODE (operands[2]) == CONST_INT)
11042 split_di (operands, 2, low, high);
11043 count = INTVAL (operands[2]) & 63;
11045 if (count >= 32)
11047 emit_move_insn (low[0], high[1]);
11048 emit_move_insn (high[0], const0_rtx);
11050 if (count > 32)
11051 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11053 else
11055 if (!rtx_equal_p (operands[0], operands[1]))
11056 emit_move_insn (operands[0], operands[1]);
11057 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11058 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11061 else
11063 if (!rtx_equal_p (operands[0], operands[1]))
11064 emit_move_insn (operands[0], operands[1]);
11066 split_di (operands, 1, low, high);
11068 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11069 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11071 /* Heh. By reversing the arguments, we can reuse this pattern. */
11072 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11074 if (! no_new_pseudos)
11075 scratch = force_reg (SImode, const0_rtx);
11076 else
11077 emit_move_insn (scratch, const0_rtx);
11079 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11080 scratch));
11082 else
11083 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11087 /* Helper function for the string operations below. Dest VARIABLE whether
11088 it is aligned to VALUE bytes. If true, jump to the label. */
11089 static rtx
11090 ix86_expand_aligntest (rtx variable, int value)
11092 rtx label = gen_label_rtx ();
11093 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11094 if (GET_MODE (variable) == DImode)
11095 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11096 else
11097 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11098 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11099 1, label);
11100 return label;
11103 /* Adjust COUNTER by the VALUE. */
11104 static void
11105 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11107 if (GET_MODE (countreg) == DImode)
11108 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11109 else
11110 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11113 /* Zero extend possibly SImode EXP to Pmode register. */
11115 ix86_zero_extend_to_Pmode (rtx exp)
11117 rtx r;
11118 if (GET_MODE (exp) == VOIDmode)
11119 return force_reg (Pmode, exp);
11120 if (GET_MODE (exp) == Pmode)
11121 return copy_to_mode_reg (Pmode, exp);
11122 r = gen_reg_rtx (Pmode);
11123 emit_insn (gen_zero_extendsidi2 (r, exp));
11124 return r;
11127 /* Expand string move (memcpy) operation. Use i386 string operations when
11128 profitable. expand_clrmem contains similar code. */
11130 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11132 rtx srcreg, destreg, countreg, srcexp, destexp;
11133 enum machine_mode counter_mode;
11134 HOST_WIDE_INT align = 0;
11135 unsigned HOST_WIDE_INT count = 0;
11137 if (GET_CODE (align_exp) == CONST_INT)
11138 align = INTVAL (align_exp);
11140 /* Can't use any of this if the user has appropriated esi or edi. */
11141 if (global_regs[4] || global_regs[5])
11142 return 0;
11144 /* This simple hack avoids all inlining code and simplifies code below. */
11145 if (!TARGET_ALIGN_STRINGOPS)
11146 align = 64;
11148 if (GET_CODE (count_exp) == CONST_INT)
11150 count = INTVAL (count_exp);
11151 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11152 return 0;
11155 /* Figure out proper mode for counter. For 32bits it is always SImode,
11156 for 64bits use SImode when possible, otherwise DImode.
11157 Set count to number of bytes copied when known at compile time. */
11158 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11159 || x86_64_zero_extended_value (count_exp))
11160 counter_mode = SImode;
11161 else
11162 counter_mode = DImode;
11164 if (counter_mode != SImode && counter_mode != DImode)
11165 abort ();
11167 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11168 if (destreg != XEXP (dst, 0))
11169 dst = replace_equiv_address_nv (dst, destreg);
11170 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11171 if (srcreg != XEXP (src, 0))
11172 src = replace_equiv_address_nv (src, srcreg);
11174 /* When optimizing for size emit simple rep ; movsb instruction for
11175 counts not divisible by 4. */
11177 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11179 emit_insn (gen_cld ());
11180 countreg = ix86_zero_extend_to_Pmode (count_exp);
11181 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11182 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11183 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11184 destexp, srcexp));
11187 /* For constant aligned (or small unaligned) copies use rep movsl
11188 followed by code copying the rest. For PentiumPro ensure 8 byte
11189 alignment to allow rep movsl acceleration. */
11191 else if (count != 0
11192 && (align >= 8
11193 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11194 || optimize_size || count < (unsigned int) 64))
11196 unsigned HOST_WIDE_INT offset = 0;
11197 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11198 rtx srcmem, dstmem;
11200 emit_insn (gen_cld ());
11201 if (count & ~(size - 1))
11203 countreg = copy_to_mode_reg (counter_mode,
11204 GEN_INT ((count >> (size == 4 ? 2 : 3))
11205 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11206 countreg = ix86_zero_extend_to_Pmode (countreg);
11208 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11209 GEN_INT (size == 4 ? 2 : 3));
11210 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11211 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11213 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11214 countreg, destexp, srcexp));
11215 offset = count & ~(size - 1);
11217 if (size == 8 && (count & 0x04))
11219 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11220 offset);
11221 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11222 offset);
11223 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11224 offset += 4;
11226 if (count & 0x02)
11228 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11229 offset);
11230 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11231 offset);
11232 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11233 offset += 2;
11235 if (count & 0x01)
11237 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11238 offset);
11239 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11240 offset);
11241 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11244 /* The generic code based on the glibc implementation:
11245 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11246 allowing accelerated copying there)
11247 - copy the data using rep movsl
11248 - copy the rest. */
11249 else
11251 rtx countreg2;
11252 rtx label = NULL;
11253 rtx srcmem, dstmem;
11254 int desired_alignment = (TARGET_PENTIUMPRO
11255 && (count == 0 || count >= (unsigned int) 260)
11256 ? 8 : UNITS_PER_WORD);
11257 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11258 dst = change_address (dst, BLKmode, destreg);
11259 src = change_address (src, BLKmode, srcreg);
11261 /* In case we don't know anything about the alignment, default to
11262 library version, since it is usually equally fast and result in
11263 shorter code.
11265 Also emit call when we know that the count is large and call overhead
11266 will not be important. */
11267 if (!TARGET_INLINE_ALL_STRINGOPS
11268 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11269 return 0;
11271 if (TARGET_SINGLE_STRINGOP)
11272 emit_insn (gen_cld ());
11274 countreg2 = gen_reg_rtx (Pmode);
11275 countreg = copy_to_mode_reg (counter_mode, count_exp);
11277 /* We don't use loops to align destination and to copy parts smaller
11278 than 4 bytes, because gcc is able to optimize such code better (in
11279 the case the destination or the count really is aligned, gcc is often
11280 able to predict the branches) and also it is friendlier to the
11281 hardware branch prediction.
11283 Using loops is beneficial for generic case, because we can
11284 handle small counts using the loops. Many CPUs (such as Athlon)
11285 have large REP prefix setup costs.
11287 This is quite costly. Maybe we can revisit this decision later or
11288 add some customizability to this code. */
11290 if (count == 0 && align < desired_alignment)
11292 label = gen_label_rtx ();
11293 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11294 LEU, 0, counter_mode, 1, label);
11296 if (align <= 1)
11298 rtx label = ix86_expand_aligntest (destreg, 1);
11299 srcmem = change_address (src, QImode, srcreg);
11300 dstmem = change_address (dst, QImode, destreg);
11301 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11302 ix86_adjust_counter (countreg, 1);
11303 emit_label (label);
11304 LABEL_NUSES (label) = 1;
11306 if (align <= 2)
11308 rtx label = ix86_expand_aligntest (destreg, 2);
11309 srcmem = change_address (src, HImode, srcreg);
11310 dstmem = change_address (dst, HImode, destreg);
11311 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11312 ix86_adjust_counter (countreg, 2);
11313 emit_label (label);
11314 LABEL_NUSES (label) = 1;
11316 if (align <= 4 && desired_alignment > 4)
11318 rtx label = ix86_expand_aligntest (destreg, 4);
11319 srcmem = change_address (src, SImode, srcreg);
11320 dstmem = change_address (dst, SImode, destreg);
11321 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11322 ix86_adjust_counter (countreg, 4);
11323 emit_label (label);
11324 LABEL_NUSES (label) = 1;
11327 if (label && desired_alignment > 4 && !TARGET_64BIT)
11329 emit_label (label);
11330 LABEL_NUSES (label) = 1;
11331 label = NULL_RTX;
11333 if (!TARGET_SINGLE_STRINGOP)
11334 emit_insn (gen_cld ());
11335 if (TARGET_64BIT)
11337 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11338 GEN_INT (3)));
11339 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11341 else
11343 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11344 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11346 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11347 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11348 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11349 countreg2, destexp, srcexp));
11351 if (label)
11353 emit_label (label);
11354 LABEL_NUSES (label) = 1;
11356 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11358 srcmem = change_address (src, SImode, srcreg);
11359 dstmem = change_address (dst, SImode, destreg);
11360 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11362 if ((align <= 4 || count == 0) && TARGET_64BIT)
11364 rtx label = ix86_expand_aligntest (countreg, 4);
11365 srcmem = change_address (src, SImode, srcreg);
11366 dstmem = change_address (dst, SImode, destreg);
11367 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11368 emit_label (label);
11369 LABEL_NUSES (label) = 1;
11371 if (align > 2 && count != 0 && (count & 2))
11373 srcmem = change_address (src, HImode, srcreg);
11374 dstmem = change_address (dst, HImode, destreg);
11375 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11377 if (align <= 2 || count == 0)
11379 rtx label = ix86_expand_aligntest (countreg, 2);
11380 srcmem = change_address (src, HImode, srcreg);
11381 dstmem = change_address (dst, HImode, destreg);
11382 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11383 emit_label (label);
11384 LABEL_NUSES (label) = 1;
11386 if (align > 1 && count != 0 && (count & 1))
11388 srcmem = change_address (src, QImode, srcreg);
11389 dstmem = change_address (dst, QImode, destreg);
11390 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11392 if (align <= 1 || count == 0)
11394 rtx label = ix86_expand_aligntest (countreg, 1);
11395 srcmem = change_address (src, QImode, srcreg);
11396 dstmem = change_address (dst, QImode, destreg);
11397 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11398 emit_label (label);
11399 LABEL_NUSES (label) = 1;
11403 return 1;
11406 /* Expand string clear operation (bzero). Use i386 string operations when
11407 profitable. expand_movmem contains similar code. */
11409 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11411 rtx destreg, zeroreg, countreg, destexp;
11412 enum machine_mode counter_mode;
11413 HOST_WIDE_INT align = 0;
11414 unsigned HOST_WIDE_INT count = 0;
11416 if (GET_CODE (align_exp) == CONST_INT)
11417 align = INTVAL (align_exp);
11419 /* Can't use any of this if the user has appropriated esi. */
11420 if (global_regs[4])
11421 return 0;
11423 /* This simple hack avoids all inlining code and simplifies code below. */
11424 if (!TARGET_ALIGN_STRINGOPS)
11425 align = 32;
11427 if (GET_CODE (count_exp) == CONST_INT)
11429 count = INTVAL (count_exp);
11430 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11431 return 0;
11433 /* Figure out proper mode for counter. For 32bits it is always SImode,
11434 for 64bits use SImode when possible, otherwise DImode.
11435 Set count to number of bytes copied when known at compile time. */
11436 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11437 || x86_64_zero_extended_value (count_exp))
11438 counter_mode = SImode;
11439 else
11440 counter_mode = DImode;
11442 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11443 if (destreg != XEXP (dst, 0))
11444 dst = replace_equiv_address_nv (dst, destreg);
11446 emit_insn (gen_cld ());
11448 /* When optimizing for size emit simple rep ; movsb instruction for
11449 counts not divisible by 4. */
11451 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11453 countreg = ix86_zero_extend_to_Pmode (count_exp);
11454 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11455 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11456 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11458 else if (count != 0
11459 && (align >= 8
11460 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11461 || optimize_size || count < (unsigned int) 64))
11463 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11464 unsigned HOST_WIDE_INT offset = 0;
11466 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11467 if (count & ~(size - 1))
11469 countreg = copy_to_mode_reg (counter_mode,
11470 GEN_INT ((count >> (size == 4 ? 2 : 3))
11471 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11472 countreg = ix86_zero_extend_to_Pmode (countreg);
11473 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11474 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11475 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11476 offset = count & ~(size - 1);
11478 if (size == 8 && (count & 0x04))
11480 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11481 offset);
11482 emit_insn (gen_strset (destreg, mem,
11483 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11484 offset += 4;
11486 if (count & 0x02)
11488 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11489 offset);
11490 emit_insn (gen_strset (destreg, mem,
11491 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11492 offset += 2;
11494 if (count & 0x01)
11496 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11497 offset);
11498 emit_insn (gen_strset (destreg, mem,
11499 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11502 else
11504 rtx countreg2;
11505 rtx label = NULL;
11506 /* Compute desired alignment of the string operation. */
11507 int desired_alignment = (TARGET_PENTIUMPRO
11508 && (count == 0 || count >= (unsigned int) 260)
11509 ? 8 : UNITS_PER_WORD);
11511 /* In case we don't know anything about the alignment, default to
11512 library version, since it is usually equally fast and result in
11513 shorter code.
11515 Also emit call when we know that the count is large and call overhead
11516 will not be important. */
11517 if (!TARGET_INLINE_ALL_STRINGOPS
11518 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11519 return 0;
11521 if (TARGET_SINGLE_STRINGOP)
11522 emit_insn (gen_cld ());
11524 countreg2 = gen_reg_rtx (Pmode);
11525 countreg = copy_to_mode_reg (counter_mode, count_exp);
11526 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11527 /* Get rid of MEM_OFFSET, it won't be accurate. */
11528 dst = change_address (dst, BLKmode, destreg);
11530 if (count == 0 && align < desired_alignment)
11532 label = gen_label_rtx ();
11533 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11534 LEU, 0, counter_mode, 1, label);
11536 if (align <= 1)
11538 rtx label = ix86_expand_aligntest (destreg, 1);
11539 emit_insn (gen_strset (destreg, dst,
11540 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11541 ix86_adjust_counter (countreg, 1);
11542 emit_label (label);
11543 LABEL_NUSES (label) = 1;
11545 if (align <= 2)
11547 rtx label = ix86_expand_aligntest (destreg, 2);
11548 emit_insn (gen_strset (destreg, dst,
11549 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11550 ix86_adjust_counter (countreg, 2);
11551 emit_label (label);
11552 LABEL_NUSES (label) = 1;
11554 if (align <= 4 && desired_alignment > 4)
11556 rtx label = ix86_expand_aligntest (destreg, 4);
11557 emit_insn (gen_strset (destreg, dst,
11558 (TARGET_64BIT
11559 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11560 : zeroreg)));
11561 ix86_adjust_counter (countreg, 4);
11562 emit_label (label);
11563 LABEL_NUSES (label) = 1;
11566 if (label && desired_alignment > 4 && !TARGET_64BIT)
11568 emit_label (label);
11569 LABEL_NUSES (label) = 1;
11570 label = NULL_RTX;
11573 if (!TARGET_SINGLE_STRINGOP)
11574 emit_insn (gen_cld ());
11575 if (TARGET_64BIT)
11577 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11578 GEN_INT (3)));
11579 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11581 else
11583 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11584 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11586 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11587 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11589 if (label)
11591 emit_label (label);
11592 LABEL_NUSES (label) = 1;
11595 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11596 emit_insn (gen_strset (destreg, dst,
11597 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11598 if (TARGET_64BIT && (align <= 4 || count == 0))
11600 rtx label = ix86_expand_aligntest (countreg, 4);
11601 emit_insn (gen_strset (destreg, dst,
11602 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11603 emit_label (label);
11604 LABEL_NUSES (label) = 1;
11606 if (align > 2 && count != 0 && (count & 2))
11607 emit_insn (gen_strset (destreg, dst,
11608 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11609 if (align <= 2 || count == 0)
11611 rtx label = ix86_expand_aligntest (countreg, 2);
11612 emit_insn (gen_strset (destreg, dst,
11613 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11614 emit_label (label);
11615 LABEL_NUSES (label) = 1;
11617 if (align > 1 && count != 0 && (count & 1))
11618 emit_insn (gen_strset (destreg, dst,
11619 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11620 if (align <= 1 || count == 0)
11622 rtx label = ix86_expand_aligntest (countreg, 1);
11623 emit_insn (gen_strset (destreg, dst,
11624 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11625 emit_label (label);
11626 LABEL_NUSES (label) = 1;
11629 return 1;
11632 /* Expand strlen. */
11634 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11636 rtx addr, scratch1, scratch2, scratch3, scratch4;
11638 /* The generic case of strlen expander is long. Avoid it's
11639 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11641 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11642 && !TARGET_INLINE_ALL_STRINGOPS
11643 && !optimize_size
11644 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11645 return 0;
11647 addr = force_reg (Pmode, XEXP (src, 0));
11648 scratch1 = gen_reg_rtx (Pmode);
11650 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11651 && !optimize_size)
11653 /* Well it seems that some optimizer does not combine a call like
11654 foo(strlen(bar), strlen(bar));
11655 when the move and the subtraction is done here. It does calculate
11656 the length just once when these instructions are done inside of
11657 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11658 often used and I use one fewer register for the lifetime of
11659 output_strlen_unroll() this is better. */
11661 emit_move_insn (out, addr);
11663 ix86_expand_strlensi_unroll_1 (out, src, align);
11665 /* strlensi_unroll_1 returns the address of the zero at the end of
11666 the string, like memchr(), so compute the length by subtracting
11667 the start address. */
11668 if (TARGET_64BIT)
11669 emit_insn (gen_subdi3 (out, out, addr));
11670 else
11671 emit_insn (gen_subsi3 (out, out, addr));
11673 else
11675 rtx unspec;
11676 scratch2 = gen_reg_rtx (Pmode);
11677 scratch3 = gen_reg_rtx (Pmode);
11678 scratch4 = force_reg (Pmode, constm1_rtx);
11680 emit_move_insn (scratch3, addr);
11681 eoschar = force_reg (QImode, eoschar);
11683 emit_insn (gen_cld ());
11684 src = replace_equiv_address_nv (src, scratch3);
11686 /* If .md starts supporting :P, this can be done in .md. */
11687 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11688 scratch4), UNSPEC_SCAS);
11689 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11690 if (TARGET_64BIT)
11692 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11693 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11695 else
11697 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11698 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11701 return 1;
11704 /* Expand the appropriate insns for doing strlen if not just doing
11705 repnz; scasb
11707 out = result, initialized with the start address
11708 align_rtx = alignment of the address.
11709 scratch = scratch register, initialized with the startaddress when
11710 not aligned, otherwise undefined
11712 This is just the body. It needs the initializations mentioned above and
11713 some address computing at the end. These things are done in i386.md. */
11715 static void
11716 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11718 int align;
11719 rtx tmp;
11720 rtx align_2_label = NULL_RTX;
11721 rtx align_3_label = NULL_RTX;
11722 rtx align_4_label = gen_label_rtx ();
11723 rtx end_0_label = gen_label_rtx ();
11724 rtx mem;
11725 rtx tmpreg = gen_reg_rtx (SImode);
11726 rtx scratch = gen_reg_rtx (SImode);
11727 rtx cmp;
11729 align = 0;
11730 if (GET_CODE (align_rtx) == CONST_INT)
11731 align = INTVAL (align_rtx);
11733 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11735 /* Is there a known alignment and is it less than 4? */
11736 if (align < 4)
11738 rtx scratch1 = gen_reg_rtx (Pmode);
11739 emit_move_insn (scratch1, out);
11740 /* Is there a known alignment and is it not 2? */
11741 if (align != 2)
11743 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11744 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11746 /* Leave just the 3 lower bits. */
11747 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11748 NULL_RTX, 0, OPTAB_WIDEN);
11750 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11751 Pmode, 1, align_4_label);
11752 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11753 Pmode, 1, align_2_label);
11754 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11755 Pmode, 1, align_3_label);
11757 else
11759 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11760 check if is aligned to 4 - byte. */
11762 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11763 NULL_RTX, 0, OPTAB_WIDEN);
11765 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11766 Pmode, 1, align_4_label);
11769 mem = change_address (src, QImode, out);
11771 /* Now compare the bytes. */
11773 /* Compare the first n unaligned byte on a byte per byte basis. */
11774 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11775 QImode, 1, end_0_label);
11777 /* Increment the address. */
11778 if (TARGET_64BIT)
11779 emit_insn (gen_adddi3 (out, out, const1_rtx));
11780 else
11781 emit_insn (gen_addsi3 (out, out, const1_rtx));
11783 /* Not needed with an alignment of 2 */
11784 if (align != 2)
11786 emit_label (align_2_label);
11788 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11789 end_0_label);
11791 if (TARGET_64BIT)
11792 emit_insn (gen_adddi3 (out, out, const1_rtx));
11793 else
11794 emit_insn (gen_addsi3 (out, out, const1_rtx));
11796 emit_label (align_3_label);
11799 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11800 end_0_label);
11802 if (TARGET_64BIT)
11803 emit_insn (gen_adddi3 (out, out, const1_rtx));
11804 else
11805 emit_insn (gen_addsi3 (out, out, const1_rtx));
11808 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11809 align this loop. It gives only huge programs, but does not help to
11810 speed up. */
11811 emit_label (align_4_label);
11813 mem = change_address (src, SImode, out);
11814 emit_move_insn (scratch, mem);
11815 if (TARGET_64BIT)
11816 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11817 else
11818 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11820 /* This formula yields a nonzero result iff one of the bytes is zero.
11821 This saves three branches inside loop and many cycles. */
11823 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11824 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11825 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11826 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11827 gen_int_mode (0x80808080, SImode)));
11828 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11829 align_4_label);
11831 if (TARGET_CMOVE)
11833 rtx reg = gen_reg_rtx (SImode);
11834 rtx reg2 = gen_reg_rtx (Pmode);
11835 emit_move_insn (reg, tmpreg);
11836 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11838 /* If zero is not in the first two bytes, move two bytes forward. */
11839 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11840 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11841 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11842 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11843 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11844 reg,
11845 tmpreg)));
11846 /* Emit lea manually to avoid clobbering of flags. */
11847 emit_insn (gen_rtx_SET (SImode, reg2,
11848 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11850 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11851 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11852 emit_insn (gen_rtx_SET (VOIDmode, out,
11853 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11854 reg2,
11855 out)));
11858 else
11860 rtx end_2_label = gen_label_rtx ();
11861 /* Is zero in the first two bytes? */
11863 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11864 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11865 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11866 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11867 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11868 pc_rtx);
11869 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11870 JUMP_LABEL (tmp) = end_2_label;
11872 /* Not in the first two. Move two bytes forward. */
11873 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11874 if (TARGET_64BIT)
11875 emit_insn (gen_adddi3 (out, out, const2_rtx));
11876 else
11877 emit_insn (gen_addsi3 (out, out, const2_rtx));
11879 emit_label (end_2_label);
11883 /* Avoid branch in fixing the byte. */
11884 tmpreg = gen_lowpart (QImode, tmpreg);
11885 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11886 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11887 if (TARGET_64BIT)
11888 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11889 else
11890 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11892 emit_label (end_0_label);
11895 void
11896 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11897 rtx callarg2 ATTRIBUTE_UNUSED,
11898 rtx pop, int sibcall)
11900 rtx use = NULL, call;
11902 if (pop == const0_rtx)
11903 pop = NULL;
11904 if (TARGET_64BIT && pop)
11905 abort ();
11907 #if TARGET_MACHO
11908 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11909 fnaddr = machopic_indirect_call_target (fnaddr);
11910 #else
11911 /* Static functions and indirect calls don't need the pic register. */
11912 if (! TARGET_64BIT && flag_pic
11913 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11914 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11915 use_reg (&use, pic_offset_table_rtx);
11917 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11919 rtx al = gen_rtx_REG (QImode, 0);
11920 emit_move_insn (al, callarg2);
11921 use_reg (&use, al);
11923 #endif /* TARGET_MACHO */
11925 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11927 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11928 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11930 if (sibcall && TARGET_64BIT
11931 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11933 rtx addr;
11934 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11935 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11936 emit_move_insn (fnaddr, addr);
11937 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11940 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11941 if (retval)
11942 call = gen_rtx_SET (VOIDmode, retval, call);
11943 if (pop)
11945 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11946 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11947 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11950 call = emit_call_insn (call);
11951 if (use)
11952 CALL_INSN_FUNCTION_USAGE (call) = use;
11956 /* Clear stack slot assignments remembered from previous functions.
11957 This is called from INIT_EXPANDERS once before RTL is emitted for each
11958 function. */
11960 static struct machine_function *
11961 ix86_init_machine_status (void)
11963 struct machine_function *f;
11965 f = ggc_alloc_cleared (sizeof (struct machine_function));
11966 f->use_fast_prologue_epilogue_nregs = -1;
11968 return f;
11971 /* Return a MEM corresponding to a stack slot with mode MODE.
11972 Allocate a new slot if necessary.
11974 The RTL for a function can have several slots available: N is
11975 which slot to use. */
11978 assign_386_stack_local (enum machine_mode mode, int n)
11980 struct stack_local_entry *s;
11982 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11983 abort ();
11985 for (s = ix86_stack_locals; s; s = s->next)
11986 if (s->mode == mode && s->n == n)
11987 return s->rtl;
11989 s = (struct stack_local_entry *)
11990 ggc_alloc (sizeof (struct stack_local_entry));
11991 s->n = n;
11992 s->mode = mode;
11993 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11995 s->next = ix86_stack_locals;
11996 ix86_stack_locals = s;
11997 return s->rtl;
12000 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12002 static GTY(()) rtx ix86_tls_symbol;
12004 ix86_tls_get_addr (void)
12007 if (!ix86_tls_symbol)
12009 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12010 (TARGET_GNU_TLS && !TARGET_64BIT)
12011 ? "___tls_get_addr"
12012 : "__tls_get_addr");
12015 return ix86_tls_symbol;
12018 /* Calculate the length of the memory address in the instruction
12019 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12021 static int
12022 memory_address_length (rtx addr)
12024 struct ix86_address parts;
12025 rtx base, index, disp;
12026 int len;
12028 if (GET_CODE (addr) == PRE_DEC
12029 || GET_CODE (addr) == POST_INC
12030 || GET_CODE (addr) == PRE_MODIFY
12031 || GET_CODE (addr) == POST_MODIFY)
12032 return 0;
12034 if (! ix86_decompose_address (addr, &parts))
12035 abort ();
12037 base = parts.base;
12038 index = parts.index;
12039 disp = parts.disp;
12040 len = 0;
12042 /* Rule of thumb:
12043 - esp as the base always wants an index,
12044 - ebp as the base always wants a displacement. */
12046 /* Register Indirect. */
12047 if (base && !index && !disp)
12049 /* esp (for its index) and ebp (for its displacement) need
12050 the two-byte modrm form. */
12051 if (addr == stack_pointer_rtx
12052 || addr == arg_pointer_rtx
12053 || addr == frame_pointer_rtx
12054 || addr == hard_frame_pointer_rtx)
12055 len = 1;
12058 /* Direct Addressing. */
12059 else if (disp && !base && !index)
12060 len = 4;
12062 else
12064 /* Find the length of the displacement constant. */
12065 if (disp)
12067 if (GET_CODE (disp) == CONST_INT
12068 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12069 && base)
12070 len = 1;
12071 else
12072 len = 4;
12074 /* ebp always wants a displacement. */
12075 else if (base == hard_frame_pointer_rtx)
12076 len = 1;
12078 /* An index requires the two-byte modrm form.... */
12079 if (index
12080 /* ...like esp, which always wants an index. */
12081 || base == stack_pointer_rtx
12082 || base == arg_pointer_rtx
12083 || base == frame_pointer_rtx)
12084 len += 1;
12087 return len;
12090 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12091 is set, expect that insn have 8bit immediate alternative. */
12093 ix86_attr_length_immediate_default (rtx insn, int shortform)
12095 int len = 0;
12096 int i;
12097 extract_insn_cached (insn);
12098 for (i = recog_data.n_operands - 1; i >= 0; --i)
12099 if (CONSTANT_P (recog_data.operand[i]))
12101 if (len)
12102 abort ();
12103 if (shortform
12104 && GET_CODE (recog_data.operand[i]) == CONST_INT
12105 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12106 len = 1;
12107 else
12109 switch (get_attr_mode (insn))
12111 case MODE_QI:
12112 len+=1;
12113 break;
12114 case MODE_HI:
12115 len+=2;
12116 break;
12117 case MODE_SI:
12118 len+=4;
12119 break;
12120 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12121 case MODE_DI:
12122 len+=4;
12123 break;
12124 default:
12125 fatal_insn ("unknown insn mode", insn);
12129 return len;
12131 /* Compute default value for "length_address" attribute. */
12133 ix86_attr_length_address_default (rtx insn)
12135 int i;
12137 if (get_attr_type (insn) == TYPE_LEA)
12139 rtx set = PATTERN (insn);
12140 if (GET_CODE (set) == SET)
12142 else if (GET_CODE (set) == PARALLEL
12143 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12144 set = XVECEXP (set, 0, 0);
12145 else
12147 #ifdef ENABLE_CHECKING
12148 abort ();
12149 #endif
12150 return 0;
12153 return memory_address_length (SET_SRC (set));
12156 extract_insn_cached (insn);
12157 for (i = recog_data.n_operands - 1; i >= 0; --i)
12158 if (GET_CODE (recog_data.operand[i]) == MEM)
12160 return memory_address_length (XEXP (recog_data.operand[i], 0));
12161 break;
12163 return 0;
12166 /* Return the maximum number of instructions a cpu can issue. */
12168 static int
12169 ix86_issue_rate (void)
12171 switch (ix86_tune)
12173 case PROCESSOR_PENTIUM:
12174 case PROCESSOR_K6:
12175 return 2;
12177 case PROCESSOR_PENTIUMPRO:
12178 case PROCESSOR_PENTIUM4:
12179 case PROCESSOR_ATHLON:
12180 case PROCESSOR_K8:
12181 case PROCESSOR_NOCONA:
12182 return 3;
12184 default:
12185 return 1;
12189 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12190 by DEP_INSN and nothing set by DEP_INSN. */
12192 static int
12193 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12195 rtx set, set2;
12197 /* Simplify the test for uninteresting insns. */
12198 if (insn_type != TYPE_SETCC
12199 && insn_type != TYPE_ICMOV
12200 && insn_type != TYPE_FCMOV
12201 && insn_type != TYPE_IBR)
12202 return 0;
12204 if ((set = single_set (dep_insn)) != 0)
12206 set = SET_DEST (set);
12207 set2 = NULL_RTX;
12209 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12210 && XVECLEN (PATTERN (dep_insn), 0) == 2
12211 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12212 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12214 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12215 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12217 else
12218 return 0;
12220 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12221 return 0;
12223 /* This test is true if the dependent insn reads the flags but
12224 not any other potentially set register. */
12225 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12226 return 0;
12228 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12229 return 0;
12231 return 1;
12234 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12235 address with operands set by DEP_INSN. */
12237 static int
12238 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12240 rtx addr;
12242 if (insn_type == TYPE_LEA
12243 && TARGET_PENTIUM)
12245 addr = PATTERN (insn);
12246 if (GET_CODE (addr) == SET)
12248 else if (GET_CODE (addr) == PARALLEL
12249 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12250 addr = XVECEXP (addr, 0, 0);
12251 else
12252 abort ();
12253 addr = SET_SRC (addr);
12255 else
12257 int i;
12258 extract_insn_cached (insn);
12259 for (i = recog_data.n_operands - 1; i >= 0; --i)
12260 if (GET_CODE (recog_data.operand[i]) == MEM)
12262 addr = XEXP (recog_data.operand[i], 0);
12263 goto found;
12265 return 0;
12266 found:;
12269 return modified_in_p (addr, dep_insn);
12272 static int
12273 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12275 enum attr_type insn_type, dep_insn_type;
12276 enum attr_memory memory;
12277 rtx set, set2;
12278 int dep_insn_code_number;
12280 /* Anti and output dependencies have zero cost on all CPUs. */
12281 if (REG_NOTE_KIND (link) != 0)
12282 return 0;
12284 dep_insn_code_number = recog_memoized (dep_insn);
12286 /* If we can't recognize the insns, we can't really do anything. */
12287 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12288 return cost;
12290 insn_type = get_attr_type (insn);
12291 dep_insn_type = get_attr_type (dep_insn);
12293 switch (ix86_tune)
12295 case PROCESSOR_PENTIUM:
12296 /* Address Generation Interlock adds a cycle of latency. */
12297 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12298 cost += 1;
12300 /* ??? Compares pair with jump/setcc. */
12301 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12302 cost = 0;
12304 /* Floating point stores require value to be ready one cycle earlier. */
12305 if (insn_type == TYPE_FMOV
12306 && get_attr_memory (insn) == MEMORY_STORE
12307 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12308 cost += 1;
12309 break;
12311 case PROCESSOR_PENTIUMPRO:
12312 memory = get_attr_memory (insn);
12314 /* INT->FP conversion is expensive. */
12315 if (get_attr_fp_int_src (dep_insn))
12316 cost += 5;
12318 /* There is one cycle extra latency between an FP op and a store. */
12319 if (insn_type == TYPE_FMOV
12320 && (set = single_set (dep_insn)) != NULL_RTX
12321 && (set2 = single_set (insn)) != NULL_RTX
12322 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12323 && GET_CODE (SET_DEST (set2)) == MEM)
12324 cost += 1;
12326 /* Show ability of reorder buffer to hide latency of load by executing
12327 in parallel with previous instruction in case
12328 previous instruction is not needed to compute the address. */
12329 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12330 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12332 /* Claim moves to take one cycle, as core can issue one load
12333 at time and the next load can start cycle later. */
12334 if (dep_insn_type == TYPE_IMOV
12335 || dep_insn_type == TYPE_FMOV)
12336 cost = 1;
12337 else if (cost > 1)
12338 cost--;
12340 break;
12342 case PROCESSOR_K6:
12343 memory = get_attr_memory (insn);
12345 /* The esp dependency is resolved before the instruction is really
12346 finished. */
12347 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12348 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12349 return 1;
12351 /* INT->FP conversion is expensive. */
12352 if (get_attr_fp_int_src (dep_insn))
12353 cost += 5;
12355 /* Show ability of reorder buffer to hide latency of load by executing
12356 in parallel with previous instruction in case
12357 previous instruction is not needed to compute the address. */
12358 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12359 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12361 /* Claim moves to take one cycle, as core can issue one load
12362 at time and the next load can start cycle later. */
12363 if (dep_insn_type == TYPE_IMOV
12364 || dep_insn_type == TYPE_FMOV)
12365 cost = 1;
12366 else if (cost > 2)
12367 cost -= 2;
12368 else
12369 cost = 1;
12371 break;
12373 case PROCESSOR_ATHLON:
12374 case PROCESSOR_K8:
12375 memory = get_attr_memory (insn);
12377 /* Show ability of reorder buffer to hide latency of load by executing
12378 in parallel with previous instruction in case
12379 previous instruction is not needed to compute the address. */
12380 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12381 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12383 enum attr_unit unit = get_attr_unit (insn);
12384 int loadcost = 3;
12386 /* Because of the difference between the length of integer and
12387 floating unit pipeline preparation stages, the memory operands
12388 for floating point are cheaper.
12390 ??? For Athlon it the difference is most probably 2. */
12391 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12392 loadcost = 3;
12393 else
12394 loadcost = TARGET_ATHLON ? 2 : 0;
12396 if (cost >= loadcost)
12397 cost -= loadcost;
12398 else
12399 cost = 0;
12402 default:
12403 break;
12406 return cost;
12409 /* How many alternative schedules to try. This should be as wide as the
12410 scheduling freedom in the DFA, but no wider. Making this value too
12411 large results extra work for the scheduler. */
12413 static int
12414 ia32_multipass_dfa_lookahead (void)
12416 if (ix86_tune == PROCESSOR_PENTIUM)
12417 return 2;
12419 if (ix86_tune == PROCESSOR_PENTIUMPRO
12420 || ix86_tune == PROCESSOR_K6)
12421 return 1;
12423 else
12424 return 0;
12428 /* Compute the alignment given to a constant that is being placed in memory.
12429 EXP is the constant and ALIGN is the alignment that the object would
12430 ordinarily have.
12431 The value of this function is used instead of that alignment to align
12432 the object. */
12435 ix86_constant_alignment (tree exp, int align)
12437 if (TREE_CODE (exp) == REAL_CST)
12439 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12440 return 64;
12441 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12442 return 128;
12444 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12445 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12446 return BITS_PER_WORD;
12448 return align;
12451 /* Compute the alignment for a static variable.
12452 TYPE is the data type, and ALIGN is the alignment that
12453 the object would ordinarily have. The value of this function is used
12454 instead of that alignment to align the object. */
12457 ix86_data_alignment (tree type, int align)
12459 if (AGGREGATE_TYPE_P (type)
12460 && TYPE_SIZE (type)
12461 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12462 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12463 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12464 return 256;
12466 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12467 to 16byte boundary. */
12468 if (TARGET_64BIT)
12470 if (AGGREGATE_TYPE_P (type)
12471 && TYPE_SIZE (type)
12472 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12473 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12474 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12475 return 128;
12478 if (TREE_CODE (type) == ARRAY_TYPE)
12480 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12481 return 64;
12482 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12483 return 128;
12485 else if (TREE_CODE (type) == COMPLEX_TYPE)
12488 if (TYPE_MODE (type) == DCmode && align < 64)
12489 return 64;
12490 if (TYPE_MODE (type) == XCmode && align < 128)
12491 return 128;
12493 else if ((TREE_CODE (type) == RECORD_TYPE
12494 || TREE_CODE (type) == UNION_TYPE
12495 || TREE_CODE (type) == QUAL_UNION_TYPE)
12496 && TYPE_FIELDS (type))
12498 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12499 return 64;
12500 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12501 return 128;
12503 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12504 || TREE_CODE (type) == INTEGER_TYPE)
12506 if (TYPE_MODE (type) == DFmode && align < 64)
12507 return 64;
12508 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12509 return 128;
12512 return align;
12515 /* Compute the alignment for a local variable.
12516 TYPE is the data type, and ALIGN is the alignment that
12517 the object would ordinarily have. The value of this macro is used
12518 instead of that alignment to align the object. */
12521 ix86_local_alignment (tree type, int align)
12523 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12524 to 16byte boundary. */
12525 if (TARGET_64BIT)
12527 if (AGGREGATE_TYPE_P (type)
12528 && TYPE_SIZE (type)
12529 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12530 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12531 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12532 return 128;
12534 if (TREE_CODE (type) == ARRAY_TYPE)
12536 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12537 return 64;
12538 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12539 return 128;
12541 else if (TREE_CODE (type) == COMPLEX_TYPE)
12543 if (TYPE_MODE (type) == DCmode && align < 64)
12544 return 64;
12545 if (TYPE_MODE (type) == XCmode && align < 128)
12546 return 128;
12548 else if ((TREE_CODE (type) == RECORD_TYPE
12549 || TREE_CODE (type) == UNION_TYPE
12550 || TREE_CODE (type) == QUAL_UNION_TYPE)
12551 && TYPE_FIELDS (type))
12553 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12554 return 64;
12555 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12556 return 128;
12558 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12559 || TREE_CODE (type) == INTEGER_TYPE)
12562 if (TYPE_MODE (type) == DFmode && align < 64)
12563 return 64;
12564 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12565 return 128;
12567 return align;
12570 /* Emit RTL insns to initialize the variable parts of a trampoline.
12571 FNADDR is an RTX for the address of the function's pure code.
12572 CXT is an RTX for the static chain value for the function. */
12573 void
12574 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12576 if (!TARGET_64BIT)
12578 /* Compute offset from the end of the jmp to the target function. */
12579 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12580 plus_constant (tramp, 10),
12581 NULL_RTX, 1, OPTAB_DIRECT);
12582 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12583 gen_int_mode (0xb9, QImode));
12584 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12585 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12586 gen_int_mode (0xe9, QImode));
12587 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12589 else
12591 int offset = 0;
12592 /* Try to load address using shorter movl instead of movabs.
12593 We may want to support movq for kernel mode, but kernel does not use
12594 trampolines at the moment. */
12595 if (x86_64_zero_extended_value (fnaddr))
12597 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12598 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12599 gen_int_mode (0xbb41, HImode));
12600 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12601 gen_lowpart (SImode, fnaddr));
12602 offset += 6;
12604 else
12606 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12607 gen_int_mode (0xbb49, HImode));
12608 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12609 fnaddr);
12610 offset += 10;
12612 /* Load static chain using movabs to r10. */
12613 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12614 gen_int_mode (0xba49, HImode));
12615 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12616 cxt);
12617 offset += 10;
12618 /* Jump to the r11 */
12619 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12620 gen_int_mode (0xff49, HImode));
12621 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12622 gen_int_mode (0xe3, QImode));
12623 offset += 3;
12624 if (offset > TRAMPOLINE_SIZE)
12625 abort ();
12628 #ifdef TRANSFER_FROM_TRAMPOLINE
12629 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12630 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12631 #endif
12634 #define def_builtin(MASK, NAME, TYPE, CODE) \
12635 do { \
12636 if ((MASK) & target_flags \
12637 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12638 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12639 NULL, NULL_TREE); \
12640 } while (0)
12642 struct builtin_description
12644 const unsigned int mask;
12645 const enum insn_code icode;
12646 const char *const name;
12647 const enum ix86_builtins code;
12648 const enum rtx_code comparison;
12649 const unsigned int flag;
12652 static const struct builtin_description bdesc_comi[] =
12654 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12655 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12656 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12657 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12658 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12659 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12660 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12661 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12662 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12663 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12664 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12665 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12666 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12667 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12668 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12669 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12670 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12671 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12672 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12673 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12674 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12675 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12676 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12677 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12680 static const struct builtin_description bdesc_2arg[] =
12682 /* SSE */
12683 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12684 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12685 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12686 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12687 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12688 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12689 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12690 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12692 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12693 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12694 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12695 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12696 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12697 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12698 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12699 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12700 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12701 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12702 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12703 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12704 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12705 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12706 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12707 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12708 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12709 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12710 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12711 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12713 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12714 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12715 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12716 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12718 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12719 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12720 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12721 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12723 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12724 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12725 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12726 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12727 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12729 /* MMX */
12730 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12731 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12732 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12733 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12734 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12735 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12736 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12737 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12739 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12740 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12741 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12742 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12743 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12744 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12745 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12746 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12748 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12749 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12750 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12752 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12753 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12754 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12755 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12757 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12758 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12760 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12761 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12762 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12763 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12764 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12765 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12767 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12768 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12769 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12770 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12772 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12773 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12774 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12775 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12776 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12777 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12779 /* Special. */
12780 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12781 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12782 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12784 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12785 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12786 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12788 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12789 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12790 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12791 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12792 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12793 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12795 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12796 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12797 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12798 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12799 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12800 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12802 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12803 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12804 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12805 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12807 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12808 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12810 /* SSE2 */
12811 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12812 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12813 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12815 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12816 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12820 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12821 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12822 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12823 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12824 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12825 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12826 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12827 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12828 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12829 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12830 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12831 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12832 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12833 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12834 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12835 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12836 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12837 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12838 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12839 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12841 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12842 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12843 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12844 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12846 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12849 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12852 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12853 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12855 /* SSE2 MMX */
12856 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12858 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12859 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12860 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12865 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12866 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12867 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12868 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12869 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12870 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12871 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12872 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12888 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12901 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12903 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12924 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12925 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12927 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12928 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12930 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12931 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12932 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12938 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12942 /* SSE3 MMX */
12943 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12944 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12945 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12946 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12947 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12948 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12951 static const struct builtin_description bdesc_1arg[] =
12953 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12954 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12956 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12957 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12958 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12960 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12961 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12962 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12963 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12964 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12965 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12980 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12987 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12988 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12996 /* SSE3 */
12997 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12998 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12999 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13002 void
13003 ix86_init_builtins (void)
13005 if (TARGET_MMX)
13006 ix86_init_mmx_sse_builtins ();
13009 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13010 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13011 builtins. */
13012 static void
13013 ix86_init_mmx_sse_builtins (void)
13015 const struct builtin_description * d;
13016 size_t i;
13018 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13019 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13020 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13021 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
13022 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13023 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13024 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13025 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13026 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13027 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13029 tree pchar_type_node = build_pointer_type (char_type_node);
13030 tree pcchar_type_node = build_pointer_type (
13031 build_type_variant (char_type_node, 1, 0));
13032 tree pfloat_type_node = build_pointer_type (float_type_node);
13033 tree pcfloat_type_node = build_pointer_type (
13034 build_type_variant (float_type_node, 1, 0));
13035 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13036 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13037 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13039 /* Comparisons. */
13040 tree int_ftype_v4sf_v4sf
13041 = build_function_type_list (integer_type_node,
13042 V4SF_type_node, V4SF_type_node, NULL_TREE);
13043 tree v4si_ftype_v4sf_v4sf
13044 = build_function_type_list (V4SI_type_node,
13045 V4SF_type_node, V4SF_type_node, NULL_TREE);
13046 /* MMX/SSE/integer conversions. */
13047 tree int_ftype_v4sf
13048 = build_function_type_list (integer_type_node,
13049 V4SF_type_node, NULL_TREE);
13050 tree int64_ftype_v4sf
13051 = build_function_type_list (long_long_integer_type_node,
13052 V4SF_type_node, NULL_TREE);
13053 tree int_ftype_v8qi
13054 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13055 tree v4sf_ftype_v4sf_int
13056 = build_function_type_list (V4SF_type_node,
13057 V4SF_type_node, integer_type_node, NULL_TREE);
13058 tree v4sf_ftype_v4sf_int64
13059 = build_function_type_list (V4SF_type_node,
13060 V4SF_type_node, long_long_integer_type_node,
13061 NULL_TREE);
13062 tree v4sf_ftype_v4sf_v2si
13063 = build_function_type_list (V4SF_type_node,
13064 V4SF_type_node, V2SI_type_node, NULL_TREE);
13065 tree int_ftype_v4hi_int
13066 = build_function_type_list (integer_type_node,
13067 V4HI_type_node, integer_type_node, NULL_TREE);
13068 tree v4hi_ftype_v4hi_int_int
13069 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13070 integer_type_node, integer_type_node,
13071 NULL_TREE);
13072 /* Miscellaneous. */
13073 tree v8qi_ftype_v4hi_v4hi
13074 = build_function_type_list (V8QI_type_node,
13075 V4HI_type_node, V4HI_type_node, NULL_TREE);
13076 tree v4hi_ftype_v2si_v2si
13077 = build_function_type_list (V4HI_type_node,
13078 V2SI_type_node, V2SI_type_node, NULL_TREE);
13079 tree v4sf_ftype_v4sf_v4sf_int
13080 = build_function_type_list (V4SF_type_node,
13081 V4SF_type_node, V4SF_type_node,
13082 integer_type_node, NULL_TREE);
13083 tree v2si_ftype_v4hi_v4hi
13084 = build_function_type_list (V2SI_type_node,
13085 V4HI_type_node, V4HI_type_node, NULL_TREE);
13086 tree v4hi_ftype_v4hi_int
13087 = build_function_type_list (V4HI_type_node,
13088 V4HI_type_node, integer_type_node, NULL_TREE);
13089 tree v4hi_ftype_v4hi_di
13090 = build_function_type_list (V4HI_type_node,
13091 V4HI_type_node, long_long_unsigned_type_node,
13092 NULL_TREE);
13093 tree v2si_ftype_v2si_di
13094 = build_function_type_list (V2SI_type_node,
13095 V2SI_type_node, long_long_unsigned_type_node,
13096 NULL_TREE);
13097 tree void_ftype_void
13098 = build_function_type (void_type_node, void_list_node);
13099 tree void_ftype_unsigned
13100 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13101 tree void_ftype_unsigned_unsigned
13102 = build_function_type_list (void_type_node, unsigned_type_node,
13103 unsigned_type_node, NULL_TREE);
13104 tree void_ftype_pcvoid_unsigned_unsigned
13105 = build_function_type_list (void_type_node, const_ptr_type_node,
13106 unsigned_type_node, unsigned_type_node,
13107 NULL_TREE);
13108 tree unsigned_ftype_void
13109 = build_function_type (unsigned_type_node, void_list_node);
13110 tree di_ftype_void
13111 = build_function_type (long_long_unsigned_type_node, void_list_node);
13112 tree v4sf_ftype_void
13113 = build_function_type (V4SF_type_node, void_list_node);
13114 tree v2si_ftype_v4sf
13115 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13116 /* Loads/stores. */
13117 tree void_ftype_v8qi_v8qi_pchar
13118 = build_function_type_list (void_type_node,
13119 V8QI_type_node, V8QI_type_node,
13120 pchar_type_node, NULL_TREE);
13121 tree v4sf_ftype_pcfloat
13122 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13123 /* @@@ the type is bogus */
13124 tree v4sf_ftype_v4sf_pv2si
13125 = build_function_type_list (V4SF_type_node,
13126 V4SF_type_node, pv2si_type_node, NULL_TREE);
13127 tree void_ftype_pv2si_v4sf
13128 = build_function_type_list (void_type_node,
13129 pv2si_type_node, V4SF_type_node, NULL_TREE);
13130 tree void_ftype_pfloat_v4sf
13131 = build_function_type_list (void_type_node,
13132 pfloat_type_node, V4SF_type_node, NULL_TREE);
13133 tree void_ftype_pdi_di
13134 = build_function_type_list (void_type_node,
13135 pdi_type_node, long_long_unsigned_type_node,
13136 NULL_TREE);
13137 tree void_ftype_pv2di_v2di
13138 = build_function_type_list (void_type_node,
13139 pv2di_type_node, V2DI_type_node, NULL_TREE);
13140 /* Normal vector unops. */
13141 tree v4sf_ftype_v4sf
13142 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13144 /* Normal vector binops. */
13145 tree v4sf_ftype_v4sf_v4sf
13146 = build_function_type_list (V4SF_type_node,
13147 V4SF_type_node, V4SF_type_node, NULL_TREE);
13148 tree v8qi_ftype_v8qi_v8qi
13149 = build_function_type_list (V8QI_type_node,
13150 V8QI_type_node, V8QI_type_node, NULL_TREE);
13151 tree v4hi_ftype_v4hi_v4hi
13152 = build_function_type_list (V4HI_type_node,
13153 V4HI_type_node, V4HI_type_node, NULL_TREE);
13154 tree v2si_ftype_v2si_v2si
13155 = build_function_type_list (V2SI_type_node,
13156 V2SI_type_node, V2SI_type_node, NULL_TREE);
13157 tree di_ftype_di_di
13158 = build_function_type_list (long_long_unsigned_type_node,
13159 long_long_unsigned_type_node,
13160 long_long_unsigned_type_node, NULL_TREE);
13162 tree v2si_ftype_v2sf
13163 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13164 tree v2sf_ftype_v2si
13165 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13166 tree v2si_ftype_v2si
13167 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13168 tree v2sf_ftype_v2sf
13169 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13170 tree v2sf_ftype_v2sf_v2sf
13171 = build_function_type_list (V2SF_type_node,
13172 V2SF_type_node, V2SF_type_node, NULL_TREE);
13173 tree v2si_ftype_v2sf_v2sf
13174 = build_function_type_list (V2SI_type_node,
13175 V2SF_type_node, V2SF_type_node, NULL_TREE);
13176 tree pint_type_node = build_pointer_type (integer_type_node);
13177 tree pcint_type_node = build_pointer_type (
13178 build_type_variant (integer_type_node, 1, 0));
13179 tree pdouble_type_node = build_pointer_type (double_type_node);
13180 tree pcdouble_type_node = build_pointer_type (
13181 build_type_variant (double_type_node, 1, 0));
13182 tree int_ftype_v2df_v2df
13183 = build_function_type_list (integer_type_node,
13184 V2DF_type_node, V2DF_type_node, NULL_TREE);
13186 tree ti_ftype_void
13187 = build_function_type (intTI_type_node, void_list_node);
13188 tree v2di_ftype_void
13189 = build_function_type (V2DI_type_node, void_list_node);
13190 tree ti_ftype_ti_ti
13191 = build_function_type_list (intTI_type_node,
13192 intTI_type_node, intTI_type_node, NULL_TREE);
13193 tree void_ftype_pcvoid
13194 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13195 tree v2di_ftype_di
13196 = build_function_type_list (V2DI_type_node,
13197 long_long_unsigned_type_node, NULL_TREE);
13198 tree di_ftype_v2di
13199 = build_function_type_list (long_long_unsigned_type_node,
13200 V2DI_type_node, NULL_TREE);
13201 tree v4sf_ftype_v4si
13202 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13203 tree v4si_ftype_v4sf
13204 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13205 tree v2df_ftype_v4si
13206 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13207 tree v4si_ftype_v2df
13208 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13209 tree v2si_ftype_v2df
13210 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13211 tree v4sf_ftype_v2df
13212 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13213 tree v2df_ftype_v2si
13214 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13215 tree v2df_ftype_v4sf
13216 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13217 tree int_ftype_v2df
13218 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13219 tree int64_ftype_v2df
13220 = build_function_type_list (long_long_integer_type_node,
13221 V2DF_type_node, NULL_TREE);
13222 tree v2df_ftype_v2df_int
13223 = build_function_type_list (V2DF_type_node,
13224 V2DF_type_node, integer_type_node, NULL_TREE);
13225 tree v2df_ftype_v2df_int64
13226 = build_function_type_list (V2DF_type_node,
13227 V2DF_type_node, long_long_integer_type_node,
13228 NULL_TREE);
13229 tree v4sf_ftype_v4sf_v2df
13230 = build_function_type_list (V4SF_type_node,
13231 V4SF_type_node, V2DF_type_node, NULL_TREE);
13232 tree v2df_ftype_v2df_v4sf
13233 = build_function_type_list (V2DF_type_node,
13234 V2DF_type_node, V4SF_type_node, NULL_TREE);
13235 tree v2df_ftype_v2df_v2df_int
13236 = build_function_type_list (V2DF_type_node,
13237 V2DF_type_node, V2DF_type_node,
13238 integer_type_node,
13239 NULL_TREE);
13240 tree v2df_ftype_v2df_pv2si
13241 = build_function_type_list (V2DF_type_node,
13242 V2DF_type_node, pv2si_type_node, NULL_TREE);
13243 tree void_ftype_pv2si_v2df
13244 = build_function_type_list (void_type_node,
13245 pv2si_type_node, V2DF_type_node, NULL_TREE);
13246 tree void_ftype_pdouble_v2df
13247 = build_function_type_list (void_type_node,
13248 pdouble_type_node, V2DF_type_node, NULL_TREE);
13249 tree void_ftype_pint_int
13250 = build_function_type_list (void_type_node,
13251 pint_type_node, integer_type_node, NULL_TREE);
13252 tree void_ftype_v16qi_v16qi_pchar
13253 = build_function_type_list (void_type_node,
13254 V16QI_type_node, V16QI_type_node,
13255 pchar_type_node, NULL_TREE);
13256 tree v2df_ftype_pcdouble
13257 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13258 tree v2df_ftype_v2df_v2df
13259 = build_function_type_list (V2DF_type_node,
13260 V2DF_type_node, V2DF_type_node, NULL_TREE);
13261 tree v16qi_ftype_v16qi_v16qi
13262 = build_function_type_list (V16QI_type_node,
13263 V16QI_type_node, V16QI_type_node, NULL_TREE);
13264 tree v8hi_ftype_v8hi_v8hi
13265 = build_function_type_list (V8HI_type_node,
13266 V8HI_type_node, V8HI_type_node, NULL_TREE);
13267 tree v4si_ftype_v4si_v4si
13268 = build_function_type_list (V4SI_type_node,
13269 V4SI_type_node, V4SI_type_node, NULL_TREE);
13270 tree v2di_ftype_v2di_v2di
13271 = build_function_type_list (V2DI_type_node,
13272 V2DI_type_node, V2DI_type_node, NULL_TREE);
13273 tree v2di_ftype_v2df_v2df
13274 = build_function_type_list (V2DI_type_node,
13275 V2DF_type_node, V2DF_type_node, NULL_TREE);
13276 tree v2df_ftype_v2df
13277 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13278 tree v2df_ftype_double
13279 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13280 tree v2df_ftype_double_double
13281 = build_function_type_list (V2DF_type_node,
13282 double_type_node, double_type_node, NULL_TREE);
13283 tree int_ftype_v8hi_int
13284 = build_function_type_list (integer_type_node,
13285 V8HI_type_node, integer_type_node, NULL_TREE);
13286 tree v8hi_ftype_v8hi_int_int
13287 = build_function_type_list (V8HI_type_node,
13288 V8HI_type_node, integer_type_node,
13289 integer_type_node, NULL_TREE);
13290 tree v2di_ftype_v2di_int
13291 = build_function_type_list (V2DI_type_node,
13292 V2DI_type_node, integer_type_node, NULL_TREE);
13293 tree v4si_ftype_v4si_int
13294 = build_function_type_list (V4SI_type_node,
13295 V4SI_type_node, integer_type_node, NULL_TREE);
13296 tree v8hi_ftype_v8hi_int
13297 = build_function_type_list (V8HI_type_node,
13298 V8HI_type_node, integer_type_node, NULL_TREE);
13299 tree v8hi_ftype_v8hi_v2di
13300 = build_function_type_list (V8HI_type_node,
13301 V8HI_type_node, V2DI_type_node, NULL_TREE);
13302 tree v4si_ftype_v4si_v2di
13303 = build_function_type_list (V4SI_type_node,
13304 V4SI_type_node, V2DI_type_node, NULL_TREE);
13305 tree v4si_ftype_v8hi_v8hi
13306 = build_function_type_list (V4SI_type_node,
13307 V8HI_type_node, V8HI_type_node, NULL_TREE);
13308 tree di_ftype_v8qi_v8qi
13309 = build_function_type_list (long_long_unsigned_type_node,
13310 V8QI_type_node, V8QI_type_node, NULL_TREE);
13311 tree di_ftype_v2si_v2si
13312 = build_function_type_list (long_long_unsigned_type_node,
13313 V2SI_type_node, V2SI_type_node, NULL_TREE);
13314 tree v2di_ftype_v16qi_v16qi
13315 = build_function_type_list (V2DI_type_node,
13316 V16QI_type_node, V16QI_type_node, NULL_TREE);
13317 tree v2di_ftype_v4si_v4si
13318 = build_function_type_list (V2DI_type_node,
13319 V4SI_type_node, V4SI_type_node, NULL_TREE);
13320 tree int_ftype_v16qi
13321 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13322 tree v16qi_ftype_pcchar
13323 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13324 tree void_ftype_pchar_v16qi
13325 = build_function_type_list (void_type_node,
13326 pchar_type_node, V16QI_type_node, NULL_TREE);
13327 tree v4si_ftype_pcint
13328 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13329 tree void_ftype_pcint_v4si
13330 = build_function_type_list (void_type_node,
13331 pcint_type_node, V4SI_type_node, NULL_TREE);
13332 tree v2di_ftype_v2di
13333 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13335 tree float80_type;
13336 tree float128_type;
13338 /* The __float80 type. */
13339 if (TYPE_MODE (long_double_type_node) == XFmode)
13340 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13341 "__float80");
13342 else
13344 /* The __float80 type. */
13345 float80_type = make_node (REAL_TYPE);
13346 TYPE_PRECISION (float80_type) = 96;
13347 layout_type (float80_type);
13348 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13351 float128_type = make_node (REAL_TYPE);
13352 TYPE_PRECISION (float128_type) = 128;
13353 layout_type (float128_type);
13354 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13356 /* Add all builtins that are more or less simple operations on two
13357 operands. */
13358 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13360 /* Use one of the operands; the target can have a different mode for
13361 mask-generating compares. */
13362 enum machine_mode mode;
13363 tree type;
13365 if (d->name == 0)
13366 continue;
13367 mode = insn_data[d->icode].operand[1].mode;
13369 switch (mode)
13371 case V16QImode:
13372 type = v16qi_ftype_v16qi_v16qi;
13373 break;
13374 case V8HImode:
13375 type = v8hi_ftype_v8hi_v8hi;
13376 break;
13377 case V4SImode:
13378 type = v4si_ftype_v4si_v4si;
13379 break;
13380 case V2DImode:
13381 type = v2di_ftype_v2di_v2di;
13382 break;
13383 case V2DFmode:
13384 type = v2df_ftype_v2df_v2df;
13385 break;
13386 case TImode:
13387 type = ti_ftype_ti_ti;
13388 break;
13389 case V4SFmode:
13390 type = v4sf_ftype_v4sf_v4sf;
13391 break;
13392 case V8QImode:
13393 type = v8qi_ftype_v8qi_v8qi;
13394 break;
13395 case V4HImode:
13396 type = v4hi_ftype_v4hi_v4hi;
13397 break;
13398 case V2SImode:
13399 type = v2si_ftype_v2si_v2si;
13400 break;
13401 case DImode:
13402 type = di_ftype_di_di;
13403 break;
13405 default:
13406 abort ();
13409 /* Override for comparisons. */
13410 if (d->icode == CODE_FOR_maskcmpv4sf3
13411 || d->icode == CODE_FOR_maskncmpv4sf3
13412 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13413 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13414 type = v4si_ftype_v4sf_v4sf;
13416 if (d->icode == CODE_FOR_maskcmpv2df3
13417 || d->icode == CODE_FOR_maskncmpv2df3
13418 || d->icode == CODE_FOR_vmmaskcmpv2df3
13419 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13420 type = v2di_ftype_v2df_v2df;
13422 def_builtin (d->mask, d->name, type, d->code);
13425 /* Add the remaining MMX insns with somewhat more complicated types. */
13426 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13427 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13428 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13429 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13430 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13432 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13433 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13434 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13436 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13437 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13439 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13440 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13442 /* comi/ucomi insns. */
13443 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13444 if (d->mask == MASK_SSE2)
13445 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13446 else
13447 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13449 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13450 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13451 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13453 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13454 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13455 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13456 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13457 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13458 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13459 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13460 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13461 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13462 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13463 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13465 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13466 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13468 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13470 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13471 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13472 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13473 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13474 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13475 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13477 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13478 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13479 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13480 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13482 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13483 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13484 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13485 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13487 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13489 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13491 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13492 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13493 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13494 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13495 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13496 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13498 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13500 /* Original 3DNow! */
13501 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13502 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13503 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13504 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13505 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13506 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13507 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13508 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13509 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13510 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13511 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13512 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13513 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13514 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13515 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13516 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13517 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13518 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13519 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13520 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13522 /* 3DNow! extension as used in the Athlon CPU. */
13523 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13524 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13525 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13526 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13527 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13528 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13530 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13532 /* SSE2 */
13533 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13536 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13537 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13538 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13540 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13548 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13549 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13552 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13559 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13572 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13573 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13574 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13581 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13582 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13584 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13589 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13605 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13610 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13611 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13613 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13615 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13618 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13622 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13623 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13632 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13639 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13640 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13642 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13644 /* Prescott New Instructions. */
13645 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13646 void_ftype_pcvoid_unsigned_unsigned,
13647 IX86_BUILTIN_MONITOR);
13648 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13649 void_ftype_unsigned_unsigned,
13650 IX86_BUILTIN_MWAIT);
13651 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13652 v4sf_ftype_v4sf,
13653 IX86_BUILTIN_MOVSHDUP);
13654 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13655 v4sf_ftype_v4sf,
13656 IX86_BUILTIN_MOVSLDUP);
13657 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13658 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13659 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13660 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13661 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13662 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13665 /* Errors in the source file can cause expand_expr to return const0_rtx
13666 where we expect a vector. To avoid crashing, use one of the vector
13667 clear instructions. */
13668 static rtx
13669 safe_vector_operand (rtx x, enum machine_mode mode)
13671 if (x != const0_rtx)
13672 return x;
13673 x = gen_reg_rtx (mode);
13675 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13676 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13677 : gen_rtx_SUBREG (DImode, x, 0)));
13678 else
13679 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13680 : gen_rtx_SUBREG (V4SFmode, x, 0),
13681 CONST0_RTX (V4SFmode)));
13682 return x;
13685 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13687 static rtx
13688 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13690 rtx pat;
13691 tree arg0 = TREE_VALUE (arglist);
13692 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13693 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13694 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13695 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13696 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13697 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13699 if (VECTOR_MODE_P (mode0))
13700 op0 = safe_vector_operand (op0, mode0);
13701 if (VECTOR_MODE_P (mode1))
13702 op1 = safe_vector_operand (op1, mode1);
13704 if (! target
13705 || GET_MODE (target) != tmode
13706 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13707 target = gen_reg_rtx (tmode);
13709 if (GET_MODE (op1) == SImode && mode1 == TImode)
13711 rtx x = gen_reg_rtx (V4SImode);
13712 emit_insn (gen_sse2_loadd (x, op1));
13713 op1 = gen_lowpart (TImode, x);
13716 /* In case the insn wants input operands in modes different from
13717 the result, abort. */
13718 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13719 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13720 abort ();
13722 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13723 op0 = copy_to_mode_reg (mode0, op0);
13724 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13725 op1 = copy_to_mode_reg (mode1, op1);
13727 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13728 yet one of the two must not be a memory. This is normally enforced
13729 by expanders, but we didn't bother to create one here. */
13730 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13731 op0 = copy_to_mode_reg (mode0, op0);
13733 pat = GEN_FCN (icode) (target, op0, op1);
13734 if (! pat)
13735 return 0;
13736 emit_insn (pat);
13737 return target;
13740 /* Subroutine of ix86_expand_builtin to take care of stores. */
13742 static rtx
13743 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13745 rtx pat;
13746 tree arg0 = TREE_VALUE (arglist);
13747 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13748 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13749 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13750 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13751 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13753 if (VECTOR_MODE_P (mode1))
13754 op1 = safe_vector_operand (op1, mode1);
13756 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13757 op1 = copy_to_mode_reg (mode1, op1);
13759 pat = GEN_FCN (icode) (op0, op1);
13760 if (pat)
13761 emit_insn (pat);
13762 return 0;
13765 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13767 static rtx
13768 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13769 rtx target, int do_load)
13771 rtx pat;
13772 tree arg0 = TREE_VALUE (arglist);
13773 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13774 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13775 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13777 if (! target
13778 || GET_MODE (target) != tmode
13779 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13780 target = gen_reg_rtx (tmode);
13781 if (do_load)
13782 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13783 else
13785 if (VECTOR_MODE_P (mode0))
13786 op0 = safe_vector_operand (op0, mode0);
13788 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13789 op0 = copy_to_mode_reg (mode0, op0);
13792 pat = GEN_FCN (icode) (target, op0);
13793 if (! pat)
13794 return 0;
13795 emit_insn (pat);
13796 return target;
13799 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13800 sqrtss, rsqrtss, rcpss. */
13802 static rtx
13803 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13805 rtx pat;
13806 tree arg0 = TREE_VALUE (arglist);
13807 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13808 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13809 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13811 if (! target
13812 || GET_MODE (target) != tmode
13813 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13814 target = gen_reg_rtx (tmode);
13816 if (VECTOR_MODE_P (mode0))
13817 op0 = safe_vector_operand (op0, mode0);
13819 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13820 op0 = copy_to_mode_reg (mode0, op0);
13822 op1 = op0;
13823 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13824 op1 = copy_to_mode_reg (mode0, op1);
13826 pat = GEN_FCN (icode) (target, op0, op1);
13827 if (! pat)
13828 return 0;
13829 emit_insn (pat);
13830 return target;
13833 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13835 static rtx
13836 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13837 rtx target)
13839 rtx pat;
13840 tree arg0 = TREE_VALUE (arglist);
13841 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13842 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13843 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13844 rtx op2;
13845 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13846 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13847 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13848 enum rtx_code comparison = d->comparison;
13850 if (VECTOR_MODE_P (mode0))
13851 op0 = safe_vector_operand (op0, mode0);
13852 if (VECTOR_MODE_P (mode1))
13853 op1 = safe_vector_operand (op1, mode1);
13855 /* Swap operands if we have a comparison that isn't available in
13856 hardware. */
13857 if (d->flag)
13859 rtx tmp = gen_reg_rtx (mode1);
13860 emit_move_insn (tmp, op1);
13861 op1 = op0;
13862 op0 = tmp;
13865 if (! target
13866 || GET_MODE (target) != tmode
13867 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13868 target = gen_reg_rtx (tmode);
13870 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13871 op0 = copy_to_mode_reg (mode0, op0);
13872 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13873 op1 = copy_to_mode_reg (mode1, op1);
13875 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13876 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13877 if (! pat)
13878 return 0;
13879 emit_insn (pat);
13880 return target;
13883 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13885 static rtx
13886 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13887 rtx target)
13889 rtx pat;
13890 tree arg0 = TREE_VALUE (arglist);
13891 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13892 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13893 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13894 rtx op2;
13895 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13896 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13897 enum rtx_code comparison = d->comparison;
13899 if (VECTOR_MODE_P (mode0))
13900 op0 = safe_vector_operand (op0, mode0);
13901 if (VECTOR_MODE_P (mode1))
13902 op1 = safe_vector_operand (op1, mode1);
13904 /* Swap operands if we have a comparison that isn't available in
13905 hardware. */
13906 if (d->flag)
13908 rtx tmp = op1;
13909 op1 = op0;
13910 op0 = tmp;
13913 target = gen_reg_rtx (SImode);
13914 emit_move_insn (target, const0_rtx);
13915 target = gen_rtx_SUBREG (QImode, target, 0);
13917 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13918 op0 = copy_to_mode_reg (mode0, op0);
13919 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13920 op1 = copy_to_mode_reg (mode1, op1);
13922 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13923 pat = GEN_FCN (d->icode) (op0, op1);
13924 if (! pat)
13925 return 0;
13926 emit_insn (pat);
13927 emit_insn (gen_rtx_SET (VOIDmode,
13928 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13929 gen_rtx_fmt_ee (comparison, QImode,
13930 SET_DEST (pat),
13931 const0_rtx)));
13933 return SUBREG_REG (target);
13936 /* Expand an expression EXP that calls a built-in function,
13937 with result going to TARGET if that's convenient
13938 (and in mode MODE if that's convenient).
13939 SUBTARGET may be used as the target for computing one of EXP's operands.
13940 IGNORE is nonzero if the value is to be ignored. */
13943 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13944 enum machine_mode mode ATTRIBUTE_UNUSED,
13945 int ignore ATTRIBUTE_UNUSED)
13947 const struct builtin_description *d;
13948 size_t i;
13949 enum insn_code icode;
13950 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13951 tree arglist = TREE_OPERAND (exp, 1);
13952 tree arg0, arg1, arg2;
13953 rtx op0, op1, op2, pat;
13954 enum machine_mode tmode, mode0, mode1, mode2;
13955 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13957 switch (fcode)
13959 case IX86_BUILTIN_EMMS:
13960 emit_insn (gen_emms ());
13961 return 0;
13963 case IX86_BUILTIN_SFENCE:
13964 emit_insn (gen_sfence ());
13965 return 0;
13967 case IX86_BUILTIN_PEXTRW:
13968 case IX86_BUILTIN_PEXTRW128:
13969 icode = (fcode == IX86_BUILTIN_PEXTRW
13970 ? CODE_FOR_mmx_pextrw
13971 : CODE_FOR_sse2_pextrw);
13972 arg0 = TREE_VALUE (arglist);
13973 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13974 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13975 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13976 tmode = insn_data[icode].operand[0].mode;
13977 mode0 = insn_data[icode].operand[1].mode;
13978 mode1 = insn_data[icode].operand[2].mode;
13980 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13981 op0 = copy_to_mode_reg (mode0, op0);
13982 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13984 error ("selector must be an integer constant in the range 0..%i",
13985 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13986 return gen_reg_rtx (tmode);
13988 if (target == 0
13989 || GET_MODE (target) != tmode
13990 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13991 target = gen_reg_rtx (tmode);
13992 pat = GEN_FCN (icode) (target, op0, op1);
13993 if (! pat)
13994 return 0;
13995 emit_insn (pat);
13996 return target;
13998 case IX86_BUILTIN_PINSRW:
13999 case IX86_BUILTIN_PINSRW128:
14000 icode = (fcode == IX86_BUILTIN_PINSRW
14001 ? CODE_FOR_mmx_pinsrw
14002 : CODE_FOR_sse2_pinsrw);
14003 arg0 = TREE_VALUE (arglist);
14004 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14005 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14006 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14007 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14008 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14009 tmode = insn_data[icode].operand[0].mode;
14010 mode0 = insn_data[icode].operand[1].mode;
14011 mode1 = insn_data[icode].operand[2].mode;
14012 mode2 = insn_data[icode].operand[3].mode;
14014 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14015 op0 = copy_to_mode_reg (mode0, op0);
14016 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14017 op1 = copy_to_mode_reg (mode1, op1);
14018 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14020 error ("selector must be an integer constant in the range 0..%i",
14021 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14022 return const0_rtx;
14024 if (target == 0
14025 || GET_MODE (target) != tmode
14026 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14027 target = gen_reg_rtx (tmode);
14028 pat = GEN_FCN (icode) (target, op0, op1, op2);
14029 if (! pat)
14030 return 0;
14031 emit_insn (pat);
14032 return target;
14034 case IX86_BUILTIN_MASKMOVQ:
14035 case IX86_BUILTIN_MASKMOVDQU:
14036 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14037 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14038 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14039 : CODE_FOR_sse2_maskmovdqu));
14040 /* Note the arg order is different from the operand order. */
14041 arg1 = TREE_VALUE (arglist);
14042 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14043 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14044 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14045 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14046 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14047 mode0 = insn_data[icode].operand[0].mode;
14048 mode1 = insn_data[icode].operand[1].mode;
14049 mode2 = insn_data[icode].operand[2].mode;
14051 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14052 op0 = copy_to_mode_reg (mode0, op0);
14053 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14054 op1 = copy_to_mode_reg (mode1, op1);
14055 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14056 op2 = copy_to_mode_reg (mode2, op2);
14057 pat = GEN_FCN (icode) (op0, op1, op2);
14058 if (! pat)
14059 return 0;
14060 emit_insn (pat);
14061 return 0;
14063 case IX86_BUILTIN_SQRTSS:
14064 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14065 case IX86_BUILTIN_RSQRTSS:
14066 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14067 case IX86_BUILTIN_RCPSS:
14068 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14070 case IX86_BUILTIN_LOADAPS:
14071 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14073 case IX86_BUILTIN_LOADUPS:
14074 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14076 case IX86_BUILTIN_STOREAPS:
14077 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14079 case IX86_BUILTIN_STOREUPS:
14080 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14082 case IX86_BUILTIN_LOADSS:
14083 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14085 case IX86_BUILTIN_STORESS:
14086 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14088 case IX86_BUILTIN_LOADHPS:
14089 case IX86_BUILTIN_LOADLPS:
14090 case IX86_BUILTIN_LOADHPD:
14091 case IX86_BUILTIN_LOADLPD:
14092 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14093 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14094 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14095 : CODE_FOR_sse2_movsd);
14096 arg0 = TREE_VALUE (arglist);
14097 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14098 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14099 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14100 tmode = insn_data[icode].operand[0].mode;
14101 mode0 = insn_data[icode].operand[1].mode;
14102 mode1 = insn_data[icode].operand[2].mode;
14104 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14105 op0 = copy_to_mode_reg (mode0, op0);
14106 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14107 if (target == 0
14108 || GET_MODE (target) != tmode
14109 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14110 target = gen_reg_rtx (tmode);
14111 pat = GEN_FCN (icode) (target, op0, op1);
14112 if (! pat)
14113 return 0;
14114 emit_insn (pat);
14115 return target;
14117 case IX86_BUILTIN_STOREHPS:
14118 case IX86_BUILTIN_STORELPS:
14119 case IX86_BUILTIN_STOREHPD:
14120 case IX86_BUILTIN_STORELPD:
14121 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14122 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14123 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14124 : CODE_FOR_sse2_movsd);
14125 arg0 = TREE_VALUE (arglist);
14126 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14127 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14128 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14129 mode0 = insn_data[icode].operand[1].mode;
14130 mode1 = insn_data[icode].operand[2].mode;
14132 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14133 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14134 op1 = copy_to_mode_reg (mode1, op1);
14136 pat = GEN_FCN (icode) (op0, op0, op1);
14137 if (! pat)
14138 return 0;
14139 emit_insn (pat);
14140 return 0;
14142 case IX86_BUILTIN_MOVNTPS:
14143 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14144 case IX86_BUILTIN_MOVNTQ:
14145 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14147 case IX86_BUILTIN_LDMXCSR:
14148 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14149 target = assign_386_stack_local (SImode, 0);
14150 emit_move_insn (target, op0);
14151 emit_insn (gen_ldmxcsr (target));
14152 return 0;
14154 case IX86_BUILTIN_STMXCSR:
14155 target = assign_386_stack_local (SImode, 0);
14156 emit_insn (gen_stmxcsr (target));
14157 return copy_to_mode_reg (SImode, target);
14159 case IX86_BUILTIN_SHUFPS:
14160 case IX86_BUILTIN_SHUFPD:
14161 icode = (fcode == IX86_BUILTIN_SHUFPS
14162 ? CODE_FOR_sse_shufps
14163 : CODE_FOR_sse2_shufpd);
14164 arg0 = TREE_VALUE (arglist);
14165 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14166 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14167 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14168 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14169 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14170 tmode = insn_data[icode].operand[0].mode;
14171 mode0 = insn_data[icode].operand[1].mode;
14172 mode1 = insn_data[icode].operand[2].mode;
14173 mode2 = insn_data[icode].operand[3].mode;
14175 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14176 op0 = copy_to_mode_reg (mode0, op0);
14177 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14178 op1 = copy_to_mode_reg (mode1, op1);
14179 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14181 /* @@@ better error message */
14182 error ("mask must be an immediate");
14183 return gen_reg_rtx (tmode);
14185 if (target == 0
14186 || GET_MODE (target) != tmode
14187 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14188 target = gen_reg_rtx (tmode);
14189 pat = GEN_FCN (icode) (target, op0, op1, op2);
14190 if (! pat)
14191 return 0;
14192 emit_insn (pat);
14193 return target;
14195 case IX86_BUILTIN_PSHUFW:
14196 case IX86_BUILTIN_PSHUFD:
14197 case IX86_BUILTIN_PSHUFHW:
14198 case IX86_BUILTIN_PSHUFLW:
14199 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14200 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14201 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14202 : CODE_FOR_mmx_pshufw);
14203 arg0 = TREE_VALUE (arglist);
14204 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14205 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14206 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14207 tmode = insn_data[icode].operand[0].mode;
14208 mode1 = insn_data[icode].operand[1].mode;
14209 mode2 = insn_data[icode].operand[2].mode;
14211 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14212 op0 = copy_to_mode_reg (mode1, op0);
14213 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14215 /* @@@ better error message */
14216 error ("mask must be an immediate");
14217 return const0_rtx;
14219 if (target == 0
14220 || GET_MODE (target) != tmode
14221 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14222 target = gen_reg_rtx (tmode);
14223 pat = GEN_FCN (icode) (target, op0, op1);
14224 if (! pat)
14225 return 0;
14226 emit_insn (pat);
14227 return target;
14229 case IX86_BUILTIN_PSLLDQI128:
14230 case IX86_BUILTIN_PSRLDQI128:
14231 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14232 : CODE_FOR_sse2_lshrti3);
14233 arg0 = TREE_VALUE (arglist);
14234 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14235 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14236 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14237 tmode = insn_data[icode].operand[0].mode;
14238 mode1 = insn_data[icode].operand[1].mode;
14239 mode2 = insn_data[icode].operand[2].mode;
14241 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14243 op0 = copy_to_reg (op0);
14244 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14246 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14248 error ("shift must be an immediate");
14249 return const0_rtx;
14251 target = gen_reg_rtx (V2DImode);
14252 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14253 if (! pat)
14254 return 0;
14255 emit_insn (pat);
14256 return target;
14258 case IX86_BUILTIN_FEMMS:
14259 emit_insn (gen_femms ());
14260 return NULL_RTX;
14262 case IX86_BUILTIN_PAVGUSB:
14263 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14265 case IX86_BUILTIN_PF2ID:
14266 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14268 case IX86_BUILTIN_PFACC:
14269 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14271 case IX86_BUILTIN_PFADD:
14272 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14274 case IX86_BUILTIN_PFCMPEQ:
14275 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14277 case IX86_BUILTIN_PFCMPGE:
14278 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14280 case IX86_BUILTIN_PFCMPGT:
14281 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14283 case IX86_BUILTIN_PFMAX:
14284 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14286 case IX86_BUILTIN_PFMIN:
14287 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14289 case IX86_BUILTIN_PFMUL:
14290 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14292 case IX86_BUILTIN_PFRCP:
14293 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14295 case IX86_BUILTIN_PFRCPIT1:
14296 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14298 case IX86_BUILTIN_PFRCPIT2:
14299 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14301 case IX86_BUILTIN_PFRSQIT1:
14302 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14304 case IX86_BUILTIN_PFRSQRT:
14305 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14307 case IX86_BUILTIN_PFSUB:
14308 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14310 case IX86_BUILTIN_PFSUBR:
14311 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14313 case IX86_BUILTIN_PI2FD:
14314 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14316 case IX86_BUILTIN_PMULHRW:
14317 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14319 case IX86_BUILTIN_PF2IW:
14320 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14322 case IX86_BUILTIN_PFNACC:
14323 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14325 case IX86_BUILTIN_PFPNACC:
14326 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14328 case IX86_BUILTIN_PI2FW:
14329 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14331 case IX86_BUILTIN_PSWAPDSI:
14332 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14334 case IX86_BUILTIN_PSWAPDSF:
14335 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14337 case IX86_BUILTIN_SSE_ZERO:
14338 target = gen_reg_rtx (V4SFmode);
14339 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14340 return target;
14342 case IX86_BUILTIN_MMX_ZERO:
14343 target = gen_reg_rtx (DImode);
14344 emit_insn (gen_mmx_clrdi (target));
14345 return target;
14347 case IX86_BUILTIN_CLRTI:
14348 target = gen_reg_rtx (V2DImode);
14349 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14350 return target;
14353 case IX86_BUILTIN_SQRTSD:
14354 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14355 case IX86_BUILTIN_LOADAPD:
14356 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14357 case IX86_BUILTIN_LOADUPD:
14358 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14360 case IX86_BUILTIN_STOREAPD:
14361 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14362 case IX86_BUILTIN_STOREUPD:
14363 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14365 case IX86_BUILTIN_LOADSD:
14366 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14368 case IX86_BUILTIN_STORESD:
14369 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14371 case IX86_BUILTIN_SETPD1:
14372 target = assign_386_stack_local (DFmode, 0);
14373 arg0 = TREE_VALUE (arglist);
14374 emit_move_insn (adjust_address (target, DFmode, 0),
14375 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14376 op0 = gen_reg_rtx (V2DFmode);
14377 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14378 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14379 return op0;
14381 case IX86_BUILTIN_SETPD:
14382 target = assign_386_stack_local (V2DFmode, 0);
14383 arg0 = TREE_VALUE (arglist);
14384 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14385 emit_move_insn (adjust_address (target, DFmode, 0),
14386 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14387 emit_move_insn (adjust_address (target, DFmode, 8),
14388 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14389 op0 = gen_reg_rtx (V2DFmode);
14390 emit_insn (gen_sse2_movapd (op0, target));
14391 return op0;
14393 case IX86_BUILTIN_LOADRPD:
14394 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14395 gen_reg_rtx (V2DFmode), 1);
14396 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14397 return target;
14399 case IX86_BUILTIN_LOADPD1:
14400 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14401 gen_reg_rtx (V2DFmode), 1);
14402 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14403 return target;
14405 case IX86_BUILTIN_STOREPD1:
14406 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14407 case IX86_BUILTIN_STORERPD:
14408 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14410 case IX86_BUILTIN_CLRPD:
14411 target = gen_reg_rtx (V2DFmode);
14412 emit_insn (gen_sse_clrv2df (target));
14413 return target;
14415 case IX86_BUILTIN_MFENCE:
14416 emit_insn (gen_sse2_mfence ());
14417 return 0;
14418 case IX86_BUILTIN_LFENCE:
14419 emit_insn (gen_sse2_lfence ());
14420 return 0;
14422 case IX86_BUILTIN_CLFLUSH:
14423 arg0 = TREE_VALUE (arglist);
14424 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14425 icode = CODE_FOR_sse2_clflush;
14426 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14427 op0 = copy_to_mode_reg (Pmode, op0);
14429 emit_insn (gen_sse2_clflush (op0));
14430 return 0;
14432 case IX86_BUILTIN_MOVNTPD:
14433 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14434 case IX86_BUILTIN_MOVNTDQ:
14435 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14436 case IX86_BUILTIN_MOVNTI:
14437 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14439 case IX86_BUILTIN_LOADDQA:
14440 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14441 case IX86_BUILTIN_LOADDQU:
14442 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14443 case IX86_BUILTIN_LOADD:
14444 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14446 case IX86_BUILTIN_STOREDQA:
14447 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14448 case IX86_BUILTIN_STOREDQU:
14449 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14450 case IX86_BUILTIN_STORED:
14451 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14453 case IX86_BUILTIN_MONITOR:
14454 arg0 = TREE_VALUE (arglist);
14455 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14456 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14457 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14458 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14459 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14460 if (!REG_P (op0))
14461 op0 = copy_to_mode_reg (SImode, op0);
14462 if (!REG_P (op1))
14463 op1 = copy_to_mode_reg (SImode, op1);
14464 if (!REG_P (op2))
14465 op2 = copy_to_mode_reg (SImode, op2);
14466 emit_insn (gen_monitor (op0, op1, op2));
14467 return 0;
14469 case IX86_BUILTIN_MWAIT:
14470 arg0 = TREE_VALUE (arglist);
14471 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14472 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14473 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14474 if (!REG_P (op0))
14475 op0 = copy_to_mode_reg (SImode, op0);
14476 if (!REG_P (op1))
14477 op1 = copy_to_mode_reg (SImode, op1);
14478 emit_insn (gen_mwait (op0, op1));
14479 return 0;
14481 case IX86_BUILTIN_LOADDDUP:
14482 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14484 case IX86_BUILTIN_LDDQU:
14485 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14488 default:
14489 break;
14492 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14493 if (d->code == fcode)
14495 /* Compares are treated specially. */
14496 if (d->icode == CODE_FOR_maskcmpv4sf3
14497 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14498 || d->icode == CODE_FOR_maskncmpv4sf3
14499 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14500 || d->icode == CODE_FOR_maskcmpv2df3
14501 || d->icode == CODE_FOR_vmmaskcmpv2df3
14502 || d->icode == CODE_FOR_maskncmpv2df3
14503 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14504 return ix86_expand_sse_compare (d, arglist, target);
14506 return ix86_expand_binop_builtin (d->icode, arglist, target);
14509 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14510 if (d->code == fcode)
14511 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14513 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14514 if (d->code == fcode)
14515 return ix86_expand_sse_comi (d, arglist, target);
14517 /* @@@ Should really do something sensible here. */
14518 return 0;
14521 /* Store OPERAND to the memory after reload is completed. This means
14522 that we can't easily use assign_stack_local. */
14524 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14526 rtx result;
14527 if (!reload_completed)
14528 abort ();
14529 if (TARGET_RED_ZONE)
14531 result = gen_rtx_MEM (mode,
14532 gen_rtx_PLUS (Pmode,
14533 stack_pointer_rtx,
14534 GEN_INT (-RED_ZONE_SIZE)));
14535 emit_move_insn (result, operand);
14537 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14539 switch (mode)
14541 case HImode:
14542 case SImode:
14543 operand = gen_lowpart (DImode, operand);
14544 /* FALLTHRU */
14545 case DImode:
14546 emit_insn (
14547 gen_rtx_SET (VOIDmode,
14548 gen_rtx_MEM (DImode,
14549 gen_rtx_PRE_DEC (DImode,
14550 stack_pointer_rtx)),
14551 operand));
14552 break;
14553 default:
14554 abort ();
14556 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14558 else
14560 switch (mode)
14562 case DImode:
14564 rtx operands[2];
14565 split_di (&operand, 1, operands, operands + 1);
14566 emit_insn (
14567 gen_rtx_SET (VOIDmode,
14568 gen_rtx_MEM (SImode,
14569 gen_rtx_PRE_DEC (Pmode,
14570 stack_pointer_rtx)),
14571 operands[1]));
14572 emit_insn (
14573 gen_rtx_SET (VOIDmode,
14574 gen_rtx_MEM (SImode,
14575 gen_rtx_PRE_DEC (Pmode,
14576 stack_pointer_rtx)),
14577 operands[0]));
14579 break;
14580 case HImode:
14581 /* It is better to store HImodes as SImodes. */
14582 if (!TARGET_PARTIAL_REG_STALL)
14583 operand = gen_lowpart (SImode, operand);
14584 /* FALLTHRU */
14585 case SImode:
14586 emit_insn (
14587 gen_rtx_SET (VOIDmode,
14588 gen_rtx_MEM (GET_MODE (operand),
14589 gen_rtx_PRE_DEC (SImode,
14590 stack_pointer_rtx)),
14591 operand));
14592 break;
14593 default:
14594 abort ();
14596 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14598 return result;
14601 /* Free operand from the memory. */
14602 void
14603 ix86_free_from_memory (enum machine_mode mode)
14605 if (!TARGET_RED_ZONE)
14607 int size;
14609 if (mode == DImode || TARGET_64BIT)
14610 size = 8;
14611 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14612 size = 2;
14613 else
14614 size = 4;
14615 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14616 to pop or add instruction if registers are available. */
14617 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14618 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14619 GEN_INT (size))));
14623 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14624 QImode must go into class Q_REGS.
14625 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14626 movdf to do mem-to-mem moves through integer regs. */
14627 enum reg_class
14628 ix86_preferred_reload_class (rtx x, enum reg_class class)
14630 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14631 return NO_REGS;
14632 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14634 /* SSE can't load any constant directly yet. */
14635 if (SSE_CLASS_P (class))
14636 return NO_REGS;
14637 /* Floats can load 0 and 1. */
14638 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14640 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14641 if (MAYBE_SSE_CLASS_P (class))
14642 return (reg_class_subset_p (class, GENERAL_REGS)
14643 ? GENERAL_REGS : FLOAT_REGS);
14644 else
14645 return class;
14647 /* General regs can load everything. */
14648 if (reg_class_subset_p (class, GENERAL_REGS))
14649 return GENERAL_REGS;
14650 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14651 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14652 return NO_REGS;
14654 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14655 return NO_REGS;
14656 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14657 return Q_REGS;
14658 return class;
14661 /* If we are copying between general and FP registers, we need a memory
14662 location. The same is true for SSE and MMX registers.
14664 The macro can't work reliably when one of the CLASSES is class containing
14665 registers from multiple units (SSE, MMX, integer). We avoid this by never
14666 combining those units in single alternative in the machine description.
14667 Ensure that this constraint holds to avoid unexpected surprises.
14669 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14670 enforce these sanity checks. */
14672 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14673 enum machine_mode mode, int strict)
14675 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14676 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14677 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14678 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14679 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14680 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14682 if (strict)
14683 abort ();
14684 else
14685 return 1;
14687 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14688 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14689 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14690 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14691 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14693 /* Return the cost of moving data from a register in class CLASS1 to
14694 one in class CLASS2.
14696 It is not required that the cost always equal 2 when FROM is the same as TO;
14697 on some machines it is expensive to move between registers if they are not
14698 general registers. */
14700 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14701 enum reg_class class2)
14703 /* In case we require secondary memory, compute cost of the store followed
14704 by load. In order to avoid bad register allocation choices, we need
14705 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14707 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14709 int cost = 1;
14711 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14712 MEMORY_MOVE_COST (mode, class1, 1));
14713 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14714 MEMORY_MOVE_COST (mode, class2, 1));
14716 /* In case of copying from general_purpose_register we may emit multiple
14717 stores followed by single load causing memory size mismatch stall.
14718 Count this as arbitrarily high cost of 20. */
14719 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14720 cost += 20;
14722 /* In the case of FP/MMX moves, the registers actually overlap, and we
14723 have to switch modes in order to treat them differently. */
14724 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14725 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14726 cost += 20;
14728 return cost;
14731 /* Moves between SSE/MMX and integer unit are expensive. */
14732 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14733 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14734 return ix86_cost->mmxsse_to_integer;
14735 if (MAYBE_FLOAT_CLASS_P (class1))
14736 return ix86_cost->fp_move;
14737 if (MAYBE_SSE_CLASS_P (class1))
14738 return ix86_cost->sse_move;
14739 if (MAYBE_MMX_CLASS_P (class1))
14740 return ix86_cost->mmx_move;
14741 return 2;
14744 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14746 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14748 /* Flags and only flags can only hold CCmode values. */
14749 if (CC_REGNO_P (regno))
14750 return GET_MODE_CLASS (mode) == MODE_CC;
14751 if (GET_MODE_CLASS (mode) == MODE_CC
14752 || GET_MODE_CLASS (mode) == MODE_RANDOM
14753 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14754 return 0;
14755 if (FP_REGNO_P (regno))
14756 return VALID_FP_MODE_P (mode);
14757 if (SSE_REGNO_P (regno))
14758 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14759 if (MMX_REGNO_P (regno))
14760 return (TARGET_MMX
14761 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14762 /* We handle both integer and floats in the general purpose registers.
14763 In future we should be able to handle vector modes as well. */
14764 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14765 return 0;
14766 /* Take care for QImode values - they can be in non-QI regs, but then
14767 they do cause partial register stalls. */
14768 if (regno < 4 || mode != QImode || TARGET_64BIT)
14769 return 1;
14770 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14773 /* Return the cost of moving data of mode M between a
14774 register and memory. A value of 2 is the default; this cost is
14775 relative to those in `REGISTER_MOVE_COST'.
14777 If moving between registers and memory is more expensive than
14778 between two registers, you should define this macro to express the
14779 relative cost.
14781 Model also increased moving costs of QImode registers in non
14782 Q_REGS classes.
14785 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14787 if (FLOAT_CLASS_P (class))
14789 int index;
14790 switch (mode)
14792 case SFmode:
14793 index = 0;
14794 break;
14795 case DFmode:
14796 index = 1;
14797 break;
14798 case XFmode:
14799 index = 2;
14800 break;
14801 default:
14802 return 100;
14804 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14806 if (SSE_CLASS_P (class))
14808 int index;
14809 switch (GET_MODE_SIZE (mode))
14811 case 4:
14812 index = 0;
14813 break;
14814 case 8:
14815 index = 1;
14816 break;
14817 case 16:
14818 index = 2;
14819 break;
14820 default:
14821 return 100;
14823 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14825 if (MMX_CLASS_P (class))
14827 int index;
14828 switch (GET_MODE_SIZE (mode))
14830 case 4:
14831 index = 0;
14832 break;
14833 case 8:
14834 index = 1;
14835 break;
14836 default:
14837 return 100;
14839 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14841 switch (GET_MODE_SIZE (mode))
14843 case 1:
14844 if (in)
14845 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14846 : ix86_cost->movzbl_load);
14847 else
14848 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14849 : ix86_cost->int_store[0] + 4);
14850 break;
14851 case 2:
14852 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14853 default:
14854 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14855 if (mode == TFmode)
14856 mode = XFmode;
14857 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14858 * (((int) GET_MODE_SIZE (mode)
14859 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14863 /* Compute a (partial) cost for rtx X. Return true if the complete
14864 cost has been computed, and false if subexpressions should be
14865 scanned. In either case, *TOTAL contains the cost result. */
14867 static bool
14868 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14870 enum machine_mode mode = GET_MODE (x);
14872 switch (code)
14874 case CONST_INT:
14875 case CONST:
14876 case LABEL_REF:
14877 case SYMBOL_REF:
14878 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14879 *total = 3;
14880 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14881 *total = 2;
14882 else if (flag_pic && SYMBOLIC_CONST (x)
14883 && (!TARGET_64BIT
14884 || (!GET_CODE (x) != LABEL_REF
14885 && (GET_CODE (x) != SYMBOL_REF
14886 || !SYMBOL_REF_LOCAL_P (x)))))
14887 *total = 1;
14888 else
14889 *total = 0;
14890 return true;
14892 case CONST_DOUBLE:
14893 if (mode == VOIDmode)
14894 *total = 0;
14895 else
14896 switch (standard_80387_constant_p (x))
14898 case 1: /* 0.0 */
14899 *total = 1;
14900 break;
14901 default: /* Other constants */
14902 *total = 2;
14903 break;
14904 case 0:
14905 case -1:
14906 /* Start with (MEM (SYMBOL_REF)), since that's where
14907 it'll probably end up. Add a penalty for size. */
14908 *total = (COSTS_N_INSNS (1)
14909 + (flag_pic != 0 && !TARGET_64BIT)
14910 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14911 break;
14913 return true;
14915 case ZERO_EXTEND:
14916 /* The zero extensions is often completely free on x86_64, so make
14917 it as cheap as possible. */
14918 if (TARGET_64BIT && mode == DImode
14919 && GET_MODE (XEXP (x, 0)) == SImode)
14920 *total = 1;
14921 else if (TARGET_ZERO_EXTEND_WITH_AND)
14922 *total = COSTS_N_INSNS (ix86_cost->add);
14923 else
14924 *total = COSTS_N_INSNS (ix86_cost->movzx);
14925 return false;
14927 case SIGN_EXTEND:
14928 *total = COSTS_N_INSNS (ix86_cost->movsx);
14929 return false;
14931 case ASHIFT:
14932 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14933 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14935 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14936 if (value == 1)
14938 *total = COSTS_N_INSNS (ix86_cost->add);
14939 return false;
14941 if ((value == 2 || value == 3)
14942 && ix86_cost->lea <= ix86_cost->shift_const)
14944 *total = COSTS_N_INSNS (ix86_cost->lea);
14945 return false;
14948 /* FALLTHRU */
14950 case ROTATE:
14951 case ASHIFTRT:
14952 case LSHIFTRT:
14953 case ROTATERT:
14954 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14956 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14958 if (INTVAL (XEXP (x, 1)) > 32)
14959 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14960 else
14961 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14963 else
14965 if (GET_CODE (XEXP (x, 1)) == AND)
14966 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14967 else
14968 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14971 else
14973 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14974 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14975 else
14976 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14978 return false;
14980 case MULT:
14981 if (FLOAT_MODE_P (mode))
14983 *total = COSTS_N_INSNS (ix86_cost->fmul);
14984 return false;
14986 else
14988 rtx op0 = XEXP (x, 0);
14989 rtx op1 = XEXP (x, 1);
14990 int nbits;
14991 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14993 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14994 for (nbits = 0; value != 0; value &= value - 1)
14995 nbits++;
14997 else
14998 /* This is arbitrary. */
14999 nbits = 7;
15001 /* Compute costs correctly for widening multiplication. */
15002 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15003 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15004 == GET_MODE_SIZE (mode))
15006 int is_mulwiden = 0;
15007 enum machine_mode inner_mode = GET_MODE (op0);
15009 if (GET_CODE (op0) == GET_CODE (op1))
15010 is_mulwiden = 1, op1 = XEXP (op1, 0);
15011 else if (GET_CODE (op1) == CONST_INT)
15013 if (GET_CODE (op0) == SIGN_EXTEND)
15014 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15015 == INTVAL (op1);
15016 else
15017 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15020 if (is_mulwiden)
15021 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15024 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15025 + nbits * ix86_cost->mult_bit)
15026 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15028 return true;
15031 case DIV:
15032 case UDIV:
15033 case MOD:
15034 case UMOD:
15035 if (FLOAT_MODE_P (mode))
15036 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15037 else
15038 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15039 return false;
15041 case PLUS:
15042 if (FLOAT_MODE_P (mode))
15043 *total = COSTS_N_INSNS (ix86_cost->fadd);
15044 else if (GET_MODE_CLASS (mode) == MODE_INT
15045 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15047 if (GET_CODE (XEXP (x, 0)) == PLUS
15048 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15049 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15050 && CONSTANT_P (XEXP (x, 1)))
15052 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15053 if (val == 2 || val == 4 || val == 8)
15055 *total = COSTS_N_INSNS (ix86_cost->lea);
15056 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15057 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15058 outer_code);
15059 *total += rtx_cost (XEXP (x, 1), outer_code);
15060 return true;
15063 else if (GET_CODE (XEXP (x, 0)) == MULT
15064 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15066 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15067 if (val == 2 || val == 4 || val == 8)
15069 *total = COSTS_N_INSNS (ix86_cost->lea);
15070 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15071 *total += rtx_cost (XEXP (x, 1), outer_code);
15072 return true;
15075 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15077 *total = COSTS_N_INSNS (ix86_cost->lea);
15078 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15079 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15080 *total += rtx_cost (XEXP (x, 1), outer_code);
15081 return true;
15084 /* FALLTHRU */
15086 case MINUS:
15087 if (FLOAT_MODE_P (mode))
15089 *total = COSTS_N_INSNS (ix86_cost->fadd);
15090 return false;
15092 /* FALLTHRU */
15094 case AND:
15095 case IOR:
15096 case XOR:
15097 if (!TARGET_64BIT && mode == DImode)
15099 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15100 + (rtx_cost (XEXP (x, 0), outer_code)
15101 << (GET_MODE (XEXP (x, 0)) != DImode))
15102 + (rtx_cost (XEXP (x, 1), outer_code)
15103 << (GET_MODE (XEXP (x, 1)) != DImode)));
15104 return true;
15106 /* FALLTHRU */
15108 case NEG:
15109 if (FLOAT_MODE_P (mode))
15111 *total = COSTS_N_INSNS (ix86_cost->fchs);
15112 return false;
15114 /* FALLTHRU */
15116 case NOT:
15117 if (!TARGET_64BIT && mode == DImode)
15118 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15119 else
15120 *total = COSTS_N_INSNS (ix86_cost->add);
15121 return false;
15123 case FLOAT_EXTEND:
15124 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15125 *total = 0;
15126 return false;
15128 case ABS:
15129 if (FLOAT_MODE_P (mode))
15130 *total = COSTS_N_INSNS (ix86_cost->fabs);
15131 return false;
15133 case SQRT:
15134 if (FLOAT_MODE_P (mode))
15135 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15136 return false;
15138 case UNSPEC:
15139 if (XINT (x, 1) == UNSPEC_TP)
15140 *total = 0;
15141 return false;
15143 default:
15144 return false;
15148 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15149 static void
15150 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15152 init_section ();
15153 fputs ("\tpushl $", asm_out_file);
15154 assemble_name (asm_out_file, XSTR (symbol, 0));
15155 fputc ('\n', asm_out_file);
15157 #endif
15159 #if TARGET_MACHO
15161 static int current_machopic_label_num;
15163 /* Given a symbol name and its associated stub, write out the
15164 definition of the stub. */
15166 void
15167 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15169 unsigned int length;
15170 char *binder_name, *symbol_name, lazy_ptr_name[32];
15171 int label = ++current_machopic_label_num;
15173 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15174 symb = (*targetm.strip_name_encoding) (symb);
15176 length = strlen (stub);
15177 binder_name = alloca (length + 32);
15178 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15180 length = strlen (symb);
15181 symbol_name = alloca (length + 32);
15182 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15184 sprintf (lazy_ptr_name, "L%d$lz", label);
15186 if (MACHOPIC_PURE)
15187 machopic_picsymbol_stub_section ();
15188 else
15189 machopic_symbol_stub_section ();
15191 fprintf (file, "%s:\n", stub);
15192 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15194 if (MACHOPIC_PURE)
15196 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15197 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15198 fprintf (file, "\tjmp %%edx\n");
15200 else
15201 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15203 fprintf (file, "%s:\n", binder_name);
15205 if (MACHOPIC_PURE)
15207 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15208 fprintf (file, "\tpushl %%eax\n");
15210 else
15211 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15213 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15215 machopic_lazy_symbol_ptr_section ();
15216 fprintf (file, "%s:\n", lazy_ptr_name);
15217 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15218 fprintf (file, "\t.long %s\n", binder_name);
15220 #endif /* TARGET_MACHO */
15222 /* Order the registers for register allocator. */
15224 void
15225 x86_order_regs_for_local_alloc (void)
15227 int pos = 0;
15228 int i;
15230 /* First allocate the local general purpose registers. */
15231 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15232 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15233 reg_alloc_order [pos++] = i;
15235 /* Global general purpose registers. */
15236 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15237 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15238 reg_alloc_order [pos++] = i;
15240 /* x87 registers come first in case we are doing FP math
15241 using them. */
15242 if (!TARGET_SSE_MATH)
15243 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15244 reg_alloc_order [pos++] = i;
15246 /* SSE registers. */
15247 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15248 reg_alloc_order [pos++] = i;
15249 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15250 reg_alloc_order [pos++] = i;
15252 /* x87 registers. */
15253 if (TARGET_SSE_MATH)
15254 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15255 reg_alloc_order [pos++] = i;
15257 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15258 reg_alloc_order [pos++] = i;
15260 /* Initialize the rest of array as we do not allocate some registers
15261 at all. */
15262 while (pos < FIRST_PSEUDO_REGISTER)
15263 reg_alloc_order [pos++] = 0;
15266 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15267 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15268 #endif
15270 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15271 struct attribute_spec.handler. */
15272 static tree
15273 ix86_handle_struct_attribute (tree *node, tree name,
15274 tree args ATTRIBUTE_UNUSED,
15275 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15277 tree *type = NULL;
15278 if (DECL_P (*node))
15280 if (TREE_CODE (*node) == TYPE_DECL)
15281 type = &TREE_TYPE (*node);
15283 else
15284 type = node;
15286 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15287 || TREE_CODE (*type) == UNION_TYPE)))
15289 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15290 *no_add_attrs = true;
15293 else if ((is_attribute_p ("ms_struct", name)
15294 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15295 || ((is_attribute_p ("gcc_struct", name)
15296 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15298 warning ("`%s' incompatible attribute ignored",
15299 IDENTIFIER_POINTER (name));
15300 *no_add_attrs = true;
15303 return NULL_TREE;
15306 static bool
15307 ix86_ms_bitfield_layout_p (tree record_type)
15309 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15310 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15311 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15314 /* Returns an expression indicating where the this parameter is
15315 located on entry to the FUNCTION. */
15317 static rtx
15318 x86_this_parameter (tree function)
15320 tree type = TREE_TYPE (function);
15322 if (TARGET_64BIT)
15324 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15325 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15328 if (ix86_function_regparm (type, function) > 0)
15330 tree parm;
15332 parm = TYPE_ARG_TYPES (type);
15333 /* Figure out whether or not the function has a variable number of
15334 arguments. */
15335 for (; parm; parm = TREE_CHAIN (parm))
15336 if (TREE_VALUE (parm) == void_type_node)
15337 break;
15338 /* If not, the this parameter is in the first argument. */
15339 if (parm)
15341 int regno = 0;
15342 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15343 regno = 2;
15344 return gen_rtx_REG (SImode, regno);
15348 if (aggregate_value_p (TREE_TYPE (type), type))
15349 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15350 else
15351 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15354 /* Determine whether x86_output_mi_thunk can succeed. */
15356 static bool
15357 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15358 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15359 HOST_WIDE_INT vcall_offset, tree function)
15361 /* 64-bit can handle anything. */
15362 if (TARGET_64BIT)
15363 return true;
15365 /* For 32-bit, everything's fine if we have one free register. */
15366 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15367 return true;
15369 /* Need a free register for vcall_offset. */
15370 if (vcall_offset)
15371 return false;
15373 /* Need a free register for GOT references. */
15374 if (flag_pic && !(*targetm.binds_local_p) (function))
15375 return false;
15377 /* Otherwise ok. */
15378 return true;
15381 /* Output the assembler code for a thunk function. THUNK_DECL is the
15382 declaration for the thunk function itself, FUNCTION is the decl for
15383 the target function. DELTA is an immediate constant offset to be
15384 added to THIS. If VCALL_OFFSET is nonzero, the word at
15385 *(*this + vcall_offset) should be added to THIS. */
15387 static void
15388 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15389 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15390 HOST_WIDE_INT vcall_offset, tree function)
15392 rtx xops[3];
15393 rtx this = x86_this_parameter (function);
15394 rtx this_reg, tmp;
15396 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15397 pull it in now and let DELTA benefit. */
15398 if (REG_P (this))
15399 this_reg = this;
15400 else if (vcall_offset)
15402 /* Put the this parameter into %eax. */
15403 xops[0] = this;
15404 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15405 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15407 else
15408 this_reg = NULL_RTX;
15410 /* Adjust the this parameter by a fixed constant. */
15411 if (delta)
15413 xops[0] = GEN_INT (delta);
15414 xops[1] = this_reg ? this_reg : this;
15415 if (TARGET_64BIT)
15417 if (!x86_64_general_operand (xops[0], DImode))
15419 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15420 xops[1] = tmp;
15421 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15422 xops[0] = tmp;
15423 xops[1] = this;
15425 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15427 else
15428 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15431 /* Adjust the this parameter by a value stored in the vtable. */
15432 if (vcall_offset)
15434 if (TARGET_64BIT)
15435 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15436 else
15438 int tmp_regno = 2 /* ECX */;
15439 if (lookup_attribute ("fastcall",
15440 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15441 tmp_regno = 0 /* EAX */;
15442 tmp = gen_rtx_REG (SImode, tmp_regno);
15445 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15446 xops[1] = tmp;
15447 if (TARGET_64BIT)
15448 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15449 else
15450 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15452 /* Adjust the this parameter. */
15453 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15454 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15456 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15457 xops[0] = GEN_INT (vcall_offset);
15458 xops[1] = tmp2;
15459 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15460 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15462 xops[1] = this_reg;
15463 if (TARGET_64BIT)
15464 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15465 else
15466 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15469 /* If necessary, drop THIS back to its stack slot. */
15470 if (this_reg && this_reg != this)
15472 xops[0] = this_reg;
15473 xops[1] = this;
15474 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15477 xops[0] = XEXP (DECL_RTL (function), 0);
15478 if (TARGET_64BIT)
15480 if (!flag_pic || (*targetm.binds_local_p) (function))
15481 output_asm_insn ("jmp\t%P0", xops);
15482 else
15484 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15485 tmp = gen_rtx_CONST (Pmode, tmp);
15486 tmp = gen_rtx_MEM (QImode, tmp);
15487 xops[0] = tmp;
15488 output_asm_insn ("jmp\t%A0", xops);
15491 else
15493 if (!flag_pic || (*targetm.binds_local_p) (function))
15494 output_asm_insn ("jmp\t%P0", xops);
15495 else
15496 #if TARGET_MACHO
15497 if (TARGET_MACHO)
15499 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15500 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15501 tmp = gen_rtx_MEM (QImode, tmp);
15502 xops[0] = tmp;
15503 output_asm_insn ("jmp\t%0", xops);
15505 else
15506 #endif /* TARGET_MACHO */
15508 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15509 output_set_got (tmp);
15511 xops[1] = tmp;
15512 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15513 output_asm_insn ("jmp\t{*}%1", xops);
15518 static void
15519 x86_file_start (void)
15521 default_file_start ();
15522 if (X86_FILE_START_VERSION_DIRECTIVE)
15523 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15524 if (X86_FILE_START_FLTUSED)
15525 fputs ("\t.global\t__fltused\n", asm_out_file);
15526 if (ix86_asm_dialect == ASM_INTEL)
15527 fputs ("\t.intel_syntax\n", asm_out_file);
15531 x86_field_alignment (tree field, int computed)
15533 enum machine_mode mode;
15534 tree type = TREE_TYPE (field);
15536 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15537 return computed;
15538 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15539 ? get_inner_array_type (type) : type);
15540 if (mode == DFmode || mode == DCmode
15541 || GET_MODE_CLASS (mode) == MODE_INT
15542 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15543 return MIN (32, computed);
15544 return computed;
15547 /* Output assembler code to FILE to increment profiler label # LABELNO
15548 for profiling a function entry. */
15549 void
15550 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15552 if (TARGET_64BIT)
15553 if (flag_pic)
15555 #ifndef NO_PROFILE_COUNTERS
15556 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15557 #endif
15558 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15560 else
15562 #ifndef NO_PROFILE_COUNTERS
15563 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15564 #endif
15565 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15567 else if (flag_pic)
15569 #ifndef NO_PROFILE_COUNTERS
15570 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15571 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15572 #endif
15573 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15575 else
15577 #ifndef NO_PROFILE_COUNTERS
15578 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15579 PROFILE_COUNT_REGISTER);
15580 #endif
15581 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15585 /* We don't have exact information about the insn sizes, but we may assume
15586 quite safely that we are informed about all 1 byte insns and memory
15587 address sizes. This is enough to eliminate unnecessary padding in
15588 99% of cases. */
15590 static int
15591 min_insn_size (rtx insn)
15593 int l = 0;
15595 if (!INSN_P (insn) || !active_insn_p (insn))
15596 return 0;
15598 /* Discard alignments we've emit and jump instructions. */
15599 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15600 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15601 return 0;
15602 if (GET_CODE (insn) == JUMP_INSN
15603 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15604 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15605 return 0;
15607 /* Important case - calls are always 5 bytes.
15608 It is common to have many calls in the row. */
15609 if (GET_CODE (insn) == CALL_INSN
15610 && symbolic_reference_mentioned_p (PATTERN (insn))
15611 && !SIBLING_CALL_P (insn))
15612 return 5;
15613 if (get_attr_length (insn) <= 1)
15614 return 1;
15616 /* For normal instructions we may rely on the sizes of addresses
15617 and the presence of symbol to require 4 bytes of encoding.
15618 This is not the case for jumps where references are PC relative. */
15619 if (GET_CODE (insn) != JUMP_INSN)
15621 l = get_attr_length_address (insn);
15622 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15623 l = 4;
15625 if (l)
15626 return 1+l;
15627 else
15628 return 2;
15631 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15632 window. */
15634 static void
15635 ix86_avoid_jump_misspredicts (void)
15637 rtx insn, start = get_insns ();
15638 int nbytes = 0, njumps = 0;
15639 int isjump = 0;
15641 /* Look for all minimal intervals of instructions containing 4 jumps.
15642 The intervals are bounded by START and INSN. NBYTES is the total
15643 size of instructions in the interval including INSN and not including
15644 START. When the NBYTES is smaller than 16 bytes, it is possible
15645 that the end of START and INSN ends up in the same 16byte page.
15647 The smallest offset in the page INSN can start is the case where START
15648 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15649 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15651 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15654 nbytes += min_insn_size (insn);
15655 if (dump_file)
15656 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15657 INSN_UID (insn), min_insn_size (insn));
15658 if ((GET_CODE (insn) == JUMP_INSN
15659 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15660 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15661 || GET_CODE (insn) == CALL_INSN)
15662 njumps++;
15663 else
15664 continue;
15666 while (njumps > 3)
15668 start = NEXT_INSN (start);
15669 if ((GET_CODE (start) == JUMP_INSN
15670 && GET_CODE (PATTERN (start)) != ADDR_VEC
15671 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15672 || GET_CODE (start) == CALL_INSN)
15673 njumps--, isjump = 1;
15674 else
15675 isjump = 0;
15676 nbytes -= min_insn_size (start);
15678 if (njumps < 0)
15679 abort ();
15680 if (dump_file)
15681 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15682 INSN_UID (start), INSN_UID (insn), nbytes);
15684 if (njumps == 3 && isjump && nbytes < 16)
15686 int padsize = 15 - nbytes + min_insn_size (insn);
15688 if (dump_file)
15689 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15690 INSN_UID (insn), padsize);
15691 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15696 /* AMD Athlon works faster
15697 when RET is not destination of conditional jump or directly preceded
15698 by other jump instruction. We avoid the penalty by inserting NOP just
15699 before the RET instructions in such cases. */
15700 static void
15701 ix86_pad_returns (void)
15703 edge e;
15705 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15707 basic_block bb = e->src;
15708 rtx ret = BB_END (bb);
15709 rtx prev;
15710 bool replace = false;
15712 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15713 || !maybe_hot_bb_p (bb))
15714 continue;
15715 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15716 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15717 break;
15718 if (prev && GET_CODE (prev) == CODE_LABEL)
15720 edge e;
15721 for (e = bb->pred; e; e = e->pred_next)
15722 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15723 && !(e->flags & EDGE_FALLTHRU))
15724 replace = true;
15726 if (!replace)
15728 prev = prev_active_insn (ret);
15729 if (prev
15730 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15731 || GET_CODE (prev) == CALL_INSN))
15732 replace = true;
15733 /* Empty functions get branch mispredict even when the jump destination
15734 is not visible to us. */
15735 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15736 replace = true;
15738 if (replace)
15740 emit_insn_before (gen_return_internal_long (), ret);
15741 delete_insn (ret);
15746 /* Implement machine specific optimizations. We implement padding of returns
15747 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15748 static void
15749 ix86_reorg (void)
15751 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15752 ix86_pad_returns ();
15753 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15754 ix86_avoid_jump_misspredicts ();
15757 /* Return nonzero when QImode register that must be represented via REX prefix
15758 is used. */
15759 bool
15760 x86_extended_QIreg_mentioned_p (rtx insn)
15762 int i;
15763 extract_insn_cached (insn);
15764 for (i = 0; i < recog_data.n_operands; i++)
15765 if (REG_P (recog_data.operand[i])
15766 && REGNO (recog_data.operand[i]) >= 4)
15767 return true;
15768 return false;
15771 /* Return nonzero when P points to register encoded via REX prefix.
15772 Called via for_each_rtx. */
15773 static int
15774 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15776 unsigned int regno;
15777 if (!REG_P (*p))
15778 return 0;
15779 regno = REGNO (*p);
15780 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15783 /* Return true when INSN mentions register that must be encoded using REX
15784 prefix. */
15785 bool
15786 x86_extended_reg_mentioned_p (rtx insn)
15788 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15791 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15792 optabs would emit if we didn't have TFmode patterns. */
15794 void
15795 x86_emit_floatuns (rtx operands[2])
15797 rtx neglab, donelab, i0, i1, f0, in, out;
15798 enum machine_mode mode, inmode;
15800 inmode = GET_MODE (operands[1]);
15801 if (inmode != SImode
15802 && inmode != DImode)
15803 abort ();
15805 out = operands[0];
15806 in = force_reg (inmode, operands[1]);
15807 mode = GET_MODE (out);
15808 neglab = gen_label_rtx ();
15809 donelab = gen_label_rtx ();
15810 i1 = gen_reg_rtx (Pmode);
15811 f0 = gen_reg_rtx (mode);
15813 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15815 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15816 emit_jump_insn (gen_jump (donelab));
15817 emit_barrier ();
15819 emit_label (neglab);
15821 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15822 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15823 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15824 expand_float (f0, i0, 0);
15825 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15827 emit_label (donelab);
15830 /* Return if we do not know how to pass TYPE solely in registers. */
15831 bool
15832 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15834 if (default_must_pass_in_stack (mode, type))
15835 return true;
15836 return (!TARGET_64BIT && type && mode == TImode);
15839 /* Initialize vector TARGET via VALS. */
15840 void
15841 ix86_expand_vector_init (rtx target, rtx vals)
15843 enum machine_mode mode = GET_MODE (target);
15844 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15845 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15846 int i;
15848 for (i = n_elts - 1; i >= 0; i--)
15849 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15850 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15851 break;
15853 /* Few special cases first...
15854 ... constants are best loaded from constant pool. */
15855 if (i < 0)
15857 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15858 return;
15861 /* ... values where only first field is non-constant are best loaded
15862 from the pool and overwritten via move later. */
15863 if (!i)
15865 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15866 GET_MODE_INNER (mode), 0);
15868 op = force_reg (mode, op);
15869 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15870 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15871 switch (GET_MODE (target))
15873 case V2DFmode:
15874 emit_insn (gen_sse2_movsd (target, target, op));
15875 break;
15876 case V4SFmode:
15877 emit_insn (gen_sse_movss (target, target, op));
15878 break;
15879 default:
15880 break;
15882 return;
15885 /* And the busy sequence doing rotations. */
15886 switch (GET_MODE (target))
15888 case V2DFmode:
15890 rtx vecop0 =
15891 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15892 rtx vecop1 =
15893 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15895 vecop0 = force_reg (V2DFmode, vecop0);
15896 vecop1 = force_reg (V2DFmode, vecop1);
15897 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15899 break;
15900 case V4SFmode:
15902 rtx vecop0 =
15903 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15904 rtx vecop1 =
15905 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15906 rtx vecop2 =
15907 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15908 rtx vecop3 =
15909 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15910 rtx tmp1 = gen_reg_rtx (V4SFmode);
15911 rtx tmp2 = gen_reg_rtx (V4SFmode);
15913 vecop0 = force_reg (V4SFmode, vecop0);
15914 vecop1 = force_reg (V4SFmode, vecop1);
15915 vecop2 = force_reg (V4SFmode, vecop2);
15916 vecop3 = force_reg (V4SFmode, vecop3);
15917 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15918 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15919 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15921 break;
15922 default:
15923 abort ();
15927 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15929 We do this in the new i386 backend to maintain source compatibility
15930 with the old cc0-based compiler. */
15932 static tree
15933 ix86_md_asm_clobbers (tree clobbers)
15935 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15936 clobbers);
15937 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15938 clobbers);
15939 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15940 clobbers);
15941 return clobbers;
15944 /* Worker function for REVERSE_CONDITION. */
15946 enum rtx_code
15947 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15949 return (mode != CCFPmode && mode != CCFPUmode
15950 ? reverse_condition (code)
15951 : reverse_condition_maybe_unordered (code));
15954 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15955 to OPERANDS[0]. */
15957 const char *
15958 output_387_reg_move (rtx insn, rtx *operands)
15960 if (REG_P (operands[1])
15961 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15963 if (REGNO (operands[0]) == FIRST_STACK_REG
15964 && TARGET_USE_FFREEP)
15965 return "ffreep\t%y0";
15966 return "fstp\t%y0";
15968 if (STACK_TOP_P (operands[0]))
15969 return "fld%z1\t%y1";
15970 return "fst\t%y0";
15973 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15974 FP status register is set. */
15976 void
15977 ix86_emit_fp_unordered_jump (rtx label)
15979 rtx reg = gen_reg_rtx (HImode);
15980 rtx temp;
15982 emit_insn (gen_x86_fnstsw_1 (reg));
15984 if (TARGET_USE_SAHF)
15986 emit_insn (gen_x86_sahf_1 (reg));
15988 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15989 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15991 else
15993 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15995 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15996 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15999 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
16000 gen_rtx_LABEL_REF (VOIDmode, label),
16001 pc_rtx);
16002 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
16003 emit_jump_insn (temp);
16006 /* Output code to perform a log1p XFmode calculation. */
16008 void ix86_emit_i387_log1p (rtx op0, rtx op1)
16010 rtx label1 = gen_label_rtx ();
16011 rtx label2 = gen_label_rtx ();
16013 rtx tmp = gen_reg_rtx (XFmode);
16014 rtx tmp2 = gen_reg_rtx (XFmode);
16016 emit_insn (gen_absxf2 (tmp, op1));
16017 emit_insn (gen_cmpxf (tmp,
16018 CONST_DOUBLE_FROM_REAL_VALUE (
16019 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16020 XFmode)));
16021 emit_jump_insn (gen_bge (label1));
16023 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16024 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
16025 emit_jump (label2);
16027 emit_label (label1);
16028 emit_move_insn (tmp, CONST1_RTX (XFmode));
16029 emit_insn (gen_addxf3 (tmp, op1, tmp));
16030 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16031 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16033 emit_label (label2);
16036 #include "gt-i386.h"