backport i386.c regparm fix.
[official-gcc.git] / gcc / config / i386 / i386.c
blobd04b183711cc20b2494b8eef3c0de6f69edac48b
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
95 /* Processor costs (relative to an add) */
96 static const
97 struct processor_costs i386_cost = { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
108 3, /* MOVE_RATIO */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
139 static const
140 struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
182 static const
183 struct processor_costs pentium_cost = {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
194 6, /* MOVE_RATIO */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
225 static const
226 struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
237 6, /* MOVE_RATIO */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
268 static const
269 struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
280 4, /* MOVE_RATIO */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
311 static const
312 struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
323 9, /* MOVE_RATIO */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
354 static const
355 struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
366 6, /* MOVE_RATIO */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
397 const struct processor_costs *ix86_cost = &pentium_cost;
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
408 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
409 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
410 const int x86_zero_extend_with_and = m_486 | m_PENT;
411 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
412 const int x86_double_with_add = ~m_386;
413 const int x86_use_bit_test = m_386;
414 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
415 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
416 const int x86_3dnow_a = m_ATHLON;
417 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
418 const int x86_branch_hints = m_PENT4;
419 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
420 const int x86_partial_reg_stall = m_PPRO;
421 const int x86_use_loop = m_K6;
422 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
423 const int x86_use_mov0 = m_K6;
424 const int x86_use_cltd = ~(m_PENT | m_K6);
425 const int x86_read_modify_write = ~m_PENT;
426 const int x86_read_modify = ~(m_PENT | m_PPRO);
427 const int x86_split_long_moves = m_PPRO;
428 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
430 const int x86_single_stringop = m_386 | m_PENT4;
431 const int x86_qimode_math = ~(0);
432 const int x86_promote_qi_regs = 0;
433 const int x86_himode_math = ~(m_PPRO);
434 const int x86_promote_hi_regs = m_PPRO;
435 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
439 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
440 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
442 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
445 const int x86_decompose_lea = m_PENT4;
446 const int x86_shift1 = ~m_486;
447 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
451 epilogue code. */
452 #define FAST_PROLOGUE_INSN_COUNT 30
454 /* Set by prologue expander and used by epilogue expander to determine
455 the style used. */
456 static int use_fast_prologue_epilogue;
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
466 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
468 /* ax, dx, cx, bx */
469 AREG, DREG, CREG, BREG,
470 /* si, di, bp, sp */
471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
472 /* FP registers */
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
475 /* arg pointer */
476 NON_Q_REGS,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480 SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
482 MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486 SSE_REGS, SSE_REGS,
489 /* The "default" register map used in 32bit mode. */
491 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
502 static int const x86_64_int_parameter_registers[6] =
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
508 static int const x86_64_int_return_registers[4] =
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
569 numbers.
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
579 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
593 rtx ix86_compare_op0 = NULL_RTX;
594 rtx ix86_compare_op1 = NULL_RTX;
596 /* The encoding characters for the four TLS models present in ELF. */
598 static char const tls_model_chars[] = " GLil";
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function GTY(())
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
608 const char *some_ld_name;
609 int save_varrargs_registers;
610 int accesses_prev_frame;
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
616 /* Structure describing stack frame layout.
617 Stack grows downward:
619 [arguments]
620 <- ARG_POINTER
621 saved pc
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
625 [saved regs]
627 [padding1] \
629 [va_arg registers] (
630 > to_allocate <- FRAME_POINTER
631 [frame] (
633 [padding2] /
635 struct ix86_frame
637 int nregs;
638 int padding1;
639 int va_arg_size;
640 HOST_WIDE_INT frame;
641 int padding2;
642 int outgoing_arguments_size;
643 int red_zone_size;
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string;
656 /* Parsed value. */
657 enum cmodel ix86_cmodel;
658 /* Asm dialect. */
659 const char *ix86_asm_string;
660 enum asm_dialect ix86_asm_dialect = ASM_ATT;
661 /* TLS dialext. */
662 const char *ix86_tls_dialect_string;
663 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath;
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch;
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string; /* for -march=<xxx> */
676 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string;
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse;
684 /* ix86_regparm_string as a number */
685 int ix86_regparm;
687 /* Alignment to use for loops and jumps: */
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string;
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string;
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string;
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary;
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost;
703 const char *ix86_branch_cost_string;
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string;
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix[16];
710 static int internal_label_prefix_len;
712 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
713 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
714 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
716 int, int, FILE *));
717 static const char *get_some_local_dynamic_name PARAMS ((void));
718 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719 static rtx maybe_get_pool_constant PARAMS ((rtx));
720 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
721 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
722 rtx *, rtx *));
723 static rtx get_thread_pointer PARAMS ((void));
724 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
725 static rtx gen_push PARAMS ((rtx));
726 static int memory_address_length PARAMS ((rtx addr));
727 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
729 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730 static void ix86_dump_ppro_packet PARAMS ((FILE *));
731 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
732 static struct machine_function * ix86_init_machine_status PARAMS ((void));
733 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
734 static int ix86_nsaved_regs PARAMS ((void));
735 static void ix86_emit_save_regs PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
737 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
738 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
739 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
740 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
741 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
742 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
743 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
745 static int ix86_issue_rate PARAMS ((void));
746 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747 static void ix86_sched_init PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
750 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins PARAMS ((void));
753 static rtx x86_this_parameter PARAMS ((tree));
754 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
755 HOST_WIDE_INT, tree));
756 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
757 HOST_WIDE_INT, tree));
759 struct ix86_address
761 rtx base, index, disp;
762 HOST_WIDE_INT scale;
765 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
766 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
768 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
769 static const char *ix86_strip_name_encoding PARAMS ((const char *))
770 ATTRIBUTE_UNUSED;
772 struct builtin_description;
773 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
774 tree, rtx));
775 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
776 tree, rtx));
777 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
778 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
779 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
780 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
781 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
782 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
783 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
784 enum rtx_code *,
785 enum rtx_code *,
786 enum rtx_code *));
787 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
788 rtx *, rtx *));
789 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
790 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
791 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
792 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
793 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
794 static int ix86_save_reg PARAMS ((unsigned int, int));
795 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
796 static int ix86_comp_type_attributes PARAMS ((tree, tree));
797 static int ix86_fntype_regparm PARAMS ((tree));
798 const struct attribute_spec ix86_attribute_table[];
799 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
800 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
801 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
802 static int ix86_value_regno PARAMS ((enum machine_mode));
803 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
805 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
806 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
807 #endif
809 /* Register class used for passing given 64bit part of the argument.
810 These represent classes as documented by the PS ABI, with the exception
811 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
812 use SF or DFmode move instead of DImode to avoid reformating penalties.
814 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
815 whenever possible (upper half does contain padding).
817 enum x86_64_reg_class
819 X86_64_NO_CLASS,
820 X86_64_INTEGER_CLASS,
821 X86_64_INTEGERSI_CLASS,
822 X86_64_SSE_CLASS,
823 X86_64_SSESF_CLASS,
824 X86_64_SSEDF_CLASS,
825 X86_64_SSEUP_CLASS,
826 X86_64_X87_CLASS,
827 X86_64_X87UP_CLASS,
828 X86_64_MEMORY_CLASS
830 static const char * const x86_64_reg_class_name[] =
831 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
833 #define MAX_CLASSES 4
834 static int classify_argument PARAMS ((enum machine_mode, tree,
835 enum x86_64_reg_class [MAX_CLASSES],
836 int));
837 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
838 int *));
839 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
840 const int *, int));
841 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
842 enum x86_64_reg_class));
844 /* Initialize the GCC target structure. */
845 #undef TARGET_ATTRIBUTE_TABLE
846 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
847 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
848 # undef TARGET_MERGE_DECL_ATTRIBUTES
849 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
850 #endif
852 #undef TARGET_COMP_TYPE_ATTRIBUTES
853 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
855 #undef TARGET_INIT_BUILTINS
856 #define TARGET_INIT_BUILTINS ix86_init_builtins
858 #undef TARGET_EXPAND_BUILTIN
859 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
861 #undef TARGET_ASM_FUNCTION_EPILOGUE
862 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
864 #undef TARGET_ASM_OPEN_PAREN
865 #define TARGET_ASM_OPEN_PAREN ""
866 #undef TARGET_ASM_CLOSE_PAREN
867 #define TARGET_ASM_CLOSE_PAREN ""
869 #undef TARGET_ASM_ALIGNED_HI_OP
870 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
871 #undef TARGET_ASM_ALIGNED_SI_OP
872 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
873 #ifdef ASM_QUAD
874 #undef TARGET_ASM_ALIGNED_DI_OP
875 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
876 #endif
878 #undef TARGET_ASM_UNALIGNED_HI_OP
879 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
880 #undef TARGET_ASM_UNALIGNED_SI_OP
881 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
882 #undef TARGET_ASM_UNALIGNED_DI_OP
883 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
885 #undef TARGET_SCHED_ADJUST_COST
886 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
887 #undef TARGET_SCHED_ISSUE_RATE
888 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
889 #undef TARGET_SCHED_VARIABLE_ISSUE
890 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
891 #undef TARGET_SCHED_INIT
892 #define TARGET_SCHED_INIT ix86_sched_init
893 #undef TARGET_SCHED_REORDER
894 #define TARGET_SCHED_REORDER ix86_sched_reorder
895 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
896 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
897 ia32_use_dfa_pipeline_interface
898 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
899 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
900 ia32_multipass_dfa_lookahead
902 #ifdef HAVE_AS_TLS
903 #undef TARGET_HAVE_TLS
904 #define TARGET_HAVE_TLS true
905 #endif
906 #undef TARGET_CANNOT_FORCE_CONST_MEM
907 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
909 #undef TARGET_MS_BITFIELD_LAYOUT_P
910 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
912 #undef TARGET_ASM_OUTPUT_MI_THUNK
913 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
914 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
915 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
917 struct gcc_target targetm = TARGET_INITIALIZER;
919 /* The svr4 ABI for the i386 says that records and unions are returned
920 in memory. */
921 #ifndef DEFAULT_PCC_STRUCT_RETURN
922 #define DEFAULT_PCC_STRUCT_RETURN 1
923 #endif
925 /* Sometimes certain combinations of command options do not make
926 sense on a particular target machine. You can define a macro
927 `OVERRIDE_OPTIONS' to take account of this. This macro, if
928 defined, is executed once just after all the command options have
929 been parsed.
931 Don't use this macro to turn on various extra optimizations for
932 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
934 void
935 override_options ()
937 int i;
938 /* Comes from final.c -- no real reason to change it. */
939 #define MAX_CODE_ALIGN 16
941 static struct ptt
943 const struct processor_costs *cost; /* Processor costs */
944 const int target_enable; /* Target flags to enable. */
945 const int target_disable; /* Target flags to disable. */
946 const int align_loop; /* Default alignments. */
947 const int align_loop_max_skip;
948 const int align_jump;
949 const int align_jump_max_skip;
950 const int align_func;
951 const int branch_cost;
953 const processor_target_table[PROCESSOR_max] =
955 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
956 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
957 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
958 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
959 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
960 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
961 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
964 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
965 static struct pta
967 const char *const name; /* processor name or nickname. */
968 const enum processor_type processor;
969 const enum pta_flags
971 PTA_SSE = 1,
972 PTA_SSE2 = 2,
973 PTA_MMX = 4,
974 PTA_PREFETCH_SSE = 8,
975 PTA_3DNOW = 16,
976 PTA_3DNOW_A = 64
977 } flags;
979 const processor_alias_table[] =
981 {"i386", PROCESSOR_I386, 0},
982 {"i486", PROCESSOR_I486, 0},
983 {"i586", PROCESSOR_PENTIUM, 0},
984 {"pentium", PROCESSOR_PENTIUM, 0},
985 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
986 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
987 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
988 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
989 {"i686", PROCESSOR_PENTIUMPRO, 0},
990 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
991 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
992 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
993 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
994 PTA_MMX | PTA_PREFETCH_SSE},
995 {"k6", PROCESSOR_K6, PTA_MMX},
996 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
997 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
998 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
999 | PTA_3DNOW_A},
1000 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1001 | PTA_3DNOW | PTA_3DNOW_A},
1002 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1003 | PTA_3DNOW_A | PTA_SSE},
1004 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1005 | PTA_3DNOW_A | PTA_SSE},
1006 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1007 | PTA_3DNOW_A | PTA_SSE},
1010 int const pta_size = ARRAY_SIZE (processor_alias_table);
1012 /* By default our XFmode is the 80-bit extended format. If we have
1013 use TFmode instead, it's also the 80-bit format, but with padding. */
1014 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1015 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1017 /* Set the default values for switches whose default depends on TARGET_64BIT
1018 in case they weren't overwriten by command line options. */
1019 if (TARGET_64BIT)
1021 if (flag_omit_frame_pointer == 2)
1022 flag_omit_frame_pointer = 1;
1023 if (flag_asynchronous_unwind_tables == 2)
1024 flag_asynchronous_unwind_tables = 1;
1025 if (flag_pcc_struct_return == 2)
1026 flag_pcc_struct_return = 0;
1028 else
1030 if (flag_omit_frame_pointer == 2)
1031 flag_omit_frame_pointer = 0;
1032 if (flag_asynchronous_unwind_tables == 2)
1033 flag_asynchronous_unwind_tables = 0;
1034 if (flag_pcc_struct_return == 2)
1035 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1038 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1039 SUBTARGET_OVERRIDE_OPTIONS;
1040 #endif
1042 if (!ix86_cpu_string && ix86_arch_string)
1043 ix86_cpu_string = ix86_arch_string;
1044 if (!ix86_cpu_string)
1045 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1046 if (!ix86_arch_string)
1047 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
1049 if (ix86_cmodel_string != 0)
1051 if (!strcmp (ix86_cmodel_string, "small"))
1052 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1053 else if (flag_pic)
1054 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1055 else if (!strcmp (ix86_cmodel_string, "32"))
1056 ix86_cmodel = CM_32;
1057 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1058 ix86_cmodel = CM_KERNEL;
1059 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1060 ix86_cmodel = CM_MEDIUM;
1061 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1062 ix86_cmodel = CM_LARGE;
1063 else
1064 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1066 else
1068 ix86_cmodel = CM_32;
1069 if (TARGET_64BIT)
1070 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1072 if (ix86_asm_string != 0)
1074 if (!strcmp (ix86_asm_string, "intel"))
1075 ix86_asm_dialect = ASM_INTEL;
1076 else if (!strcmp (ix86_asm_string, "att"))
1077 ix86_asm_dialect = ASM_ATT;
1078 else
1079 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1081 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1082 error ("code model `%s' not supported in the %s bit mode",
1083 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1084 if (ix86_cmodel == CM_LARGE)
1085 sorry ("code model `large' not supported yet");
1086 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1087 sorry ("%i-bit mode not compiled in",
1088 (target_flags & MASK_64BIT) ? 64 : 32);
1090 for (i = 0; i < pta_size; i++)
1091 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1093 ix86_arch = processor_alias_table[i].processor;
1094 /* Default cpu tuning to the architecture. */
1095 ix86_cpu = ix86_arch;
1096 if (processor_alias_table[i].flags & PTA_MMX
1097 && !(target_flags_explicit & MASK_MMX))
1098 target_flags |= MASK_MMX;
1099 if (processor_alias_table[i].flags & PTA_3DNOW
1100 && !(target_flags_explicit & MASK_3DNOW))
1101 target_flags |= MASK_3DNOW;
1102 if (processor_alias_table[i].flags & PTA_3DNOW_A
1103 && !(target_flags_explicit & MASK_3DNOW_A))
1104 target_flags |= MASK_3DNOW_A;
1105 if (processor_alias_table[i].flags & PTA_SSE
1106 && !(target_flags_explicit & MASK_SSE))
1107 target_flags |= MASK_SSE;
1108 if (processor_alias_table[i].flags & PTA_SSE2
1109 && !(target_flags_explicit & MASK_SSE2))
1110 target_flags |= MASK_SSE2;
1111 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1112 x86_prefetch_sse = true;
1113 break;
1116 if (i == pta_size)
1117 error ("bad value (%s) for -march= switch", ix86_arch_string);
1119 for (i = 0; i < pta_size; i++)
1120 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1122 ix86_cpu = processor_alias_table[i].processor;
1123 break;
1125 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1126 x86_prefetch_sse = true;
1127 if (i == pta_size)
1128 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1130 if (optimize_size)
1131 ix86_cost = &size_cost;
1132 else
1133 ix86_cost = processor_target_table[ix86_cpu].cost;
1134 target_flags |= processor_target_table[ix86_cpu].target_enable;
1135 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1137 /* Arrange to set up i386_stack_locals for all functions. */
1138 init_machine_status = ix86_init_machine_status;
1140 /* Validate -mregparm= value. */
1141 if (ix86_regparm_string)
1143 i = atoi (ix86_regparm_string);
1144 if (i < 0 || i > REGPARM_MAX)
1145 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1146 else
1147 ix86_regparm = i;
1149 else
1150 if (TARGET_64BIT)
1151 ix86_regparm = REGPARM_MAX;
1153 /* If the user has provided any of the -malign-* options,
1154 warn and use that value only if -falign-* is not set.
1155 Remove this code in GCC 3.2 or later. */
1156 if (ix86_align_loops_string)
1158 warning ("-malign-loops is obsolete, use -falign-loops");
1159 if (align_loops == 0)
1161 i = atoi (ix86_align_loops_string);
1162 if (i < 0 || i > MAX_CODE_ALIGN)
1163 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1164 else
1165 align_loops = 1 << i;
1169 if (ix86_align_jumps_string)
1171 warning ("-malign-jumps is obsolete, use -falign-jumps");
1172 if (align_jumps == 0)
1174 i = atoi (ix86_align_jumps_string);
1175 if (i < 0 || i > MAX_CODE_ALIGN)
1176 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1177 else
1178 align_jumps = 1 << i;
1182 if (ix86_align_funcs_string)
1184 warning ("-malign-functions is obsolete, use -falign-functions");
1185 if (align_functions == 0)
1187 i = atoi (ix86_align_funcs_string);
1188 if (i < 0 || i > MAX_CODE_ALIGN)
1189 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1190 else
1191 align_functions = 1 << i;
1195 /* Default align_* from the processor table. */
1196 if (align_loops == 0)
1198 align_loops = processor_target_table[ix86_cpu].align_loop;
1199 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1201 if (align_jumps == 0)
1203 align_jumps = processor_target_table[ix86_cpu].align_jump;
1204 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1206 if (align_functions == 0)
1208 align_functions = processor_target_table[ix86_cpu].align_func;
1211 /* Validate -mpreferred-stack-boundary= value, or provide default.
1212 The default of 128 bits is for Pentium III's SSE __m128, but we
1213 don't want additional code to keep the stack aligned when
1214 optimizing for code size. */
1215 ix86_preferred_stack_boundary = (optimize_size
1216 ? TARGET_64BIT ? 128 : 32
1217 : 128);
1218 if (ix86_preferred_stack_boundary_string)
1220 i = atoi (ix86_preferred_stack_boundary_string);
1221 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1222 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1223 TARGET_64BIT ? 4 : 2);
1224 else
1225 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1228 /* Validate -mbranch-cost= value, or provide default. */
1229 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1230 if (ix86_branch_cost_string)
1232 i = atoi (ix86_branch_cost_string);
1233 if (i < 0 || i > 5)
1234 error ("-mbranch-cost=%d is not between 0 and 5", i);
1235 else
1236 ix86_branch_cost = i;
1239 if (ix86_tls_dialect_string)
1241 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1242 ix86_tls_dialect = TLS_DIALECT_GNU;
1243 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1244 ix86_tls_dialect = TLS_DIALECT_SUN;
1245 else
1246 error ("bad value (%s) for -mtls-dialect= switch",
1247 ix86_tls_dialect_string);
1250 /* Keep nonleaf frame pointers. */
1251 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1252 flag_omit_frame_pointer = 1;
1254 /* If we're doing fast math, we don't care about comparison order
1255 wrt NaNs. This lets us use a shorter comparison sequence. */
1256 if (flag_unsafe_math_optimizations)
1257 target_flags &= ~MASK_IEEE_FP;
1259 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1260 since the insns won't need emulation. */
1261 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1262 target_flags &= ~MASK_NO_FANCY_MATH_387;
1264 if (TARGET_64BIT)
1266 if (TARGET_ALIGN_DOUBLE)
1267 error ("-malign-double makes no sense in the 64bit mode");
1268 if (TARGET_RTD)
1269 error ("-mrtd calling convention not supported in the 64bit mode");
1270 /* Enable by default the SSE and MMX builtins. */
1271 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1272 ix86_fpmath = FPMATH_SSE;
1274 else
1275 ix86_fpmath = FPMATH_387;
1277 if (ix86_fpmath_string != 0)
1279 if (! strcmp (ix86_fpmath_string, "387"))
1280 ix86_fpmath = FPMATH_387;
1281 else if (! strcmp (ix86_fpmath_string, "sse"))
1283 if (!TARGET_SSE)
1285 warning ("SSE instruction set disabled, using 387 arithmetics");
1286 ix86_fpmath = FPMATH_387;
1288 else
1289 ix86_fpmath = FPMATH_SSE;
1291 else if (! strcmp (ix86_fpmath_string, "387,sse")
1292 || ! strcmp (ix86_fpmath_string, "sse,387"))
1294 if (!TARGET_SSE)
1296 warning ("SSE instruction set disabled, using 387 arithmetics");
1297 ix86_fpmath = FPMATH_387;
1299 else if (!TARGET_80387)
1301 warning ("387 instruction set disabled, using SSE arithmetics");
1302 ix86_fpmath = FPMATH_SSE;
1304 else
1305 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1307 else
1308 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1311 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1312 on by -msse. */
1313 if (TARGET_SSE)
1315 target_flags |= MASK_MMX;
1316 x86_prefetch_sse = true;
1319 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1320 if (TARGET_3DNOW)
1322 target_flags |= MASK_MMX;
1323 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1324 extensions it adds. */
1325 if (x86_3dnow_a & (1 << ix86_arch))
1326 target_flags |= MASK_3DNOW_A;
1328 if ((x86_accumulate_outgoing_args & CPUMASK)
1329 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1330 && !optimize_size)
1331 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1333 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1335 char *p;
1336 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1337 p = strchr (internal_label_prefix, 'X');
1338 internal_label_prefix_len = p - internal_label_prefix;
1339 *p = '\0';
1343 void
1344 optimization_options (level, size)
1345 int level;
1346 int size ATTRIBUTE_UNUSED;
1348 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1349 make the problem with not enough registers even worse. */
1350 #ifdef INSN_SCHEDULING
1351 if (level > 1)
1352 flag_schedule_insns = 0;
1353 #endif
1355 /* The default values of these switches depend on the TARGET_64BIT
1356 that is not known at this moment. Mark these values with 2 and
1357 let user the to override these. In case there is no command line option
1358 specifying them, we will set the defaults in override_options. */
1359 if (optimize >= 1)
1360 flag_omit_frame_pointer = 2;
1361 flag_pcc_struct_return = 2;
1362 flag_asynchronous_unwind_tables = 2;
1365 /* Table of valid machine attributes. */
1366 const struct attribute_spec ix86_attribute_table[] =
1368 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1369 /* Stdcall attribute says callee is responsible for popping arguments
1370 if they are not variable. */
1371 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1372 /* Fastcall attribute says callee is responsible for popping arguments
1373 if they are not variable. */
1374 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1375 /* Cdecl attribute says the callee is a normal C declaration */
1376 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1377 /* Regparm attribute specifies how many integer arguments are to be
1378 passed in registers. */
1379 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1380 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1381 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1382 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1383 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1384 #endif
1385 { NULL, 0, 0, false, false, false, NULL }
1388 /* Handle a "cdecl" or "stdcall" attribute;
1389 arguments as in struct attribute_spec.handler. */
1390 static tree
1391 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1392 tree *node;
1393 tree name;
1394 tree args ATTRIBUTE_UNUSED;
1395 int flags ATTRIBUTE_UNUSED;
1396 bool *no_add_attrs;
1398 if (TREE_CODE (*node) != FUNCTION_TYPE
1399 && TREE_CODE (*node) != METHOD_TYPE
1400 && TREE_CODE (*node) != FIELD_DECL
1401 && TREE_CODE (*node) != TYPE_DECL)
1403 warning ("`%s' attribute only applies to functions",
1404 IDENTIFIER_POINTER (name));
1405 *no_add_attrs = true;
1408 if (TARGET_64BIT)
1410 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1411 *no_add_attrs = true;
1414 return NULL_TREE;
1417 /* Handle a "regparm" attribute;
1418 arguments as in struct attribute_spec.handler. */
1419 static tree
1420 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1421 tree *node;
1422 tree name;
1423 tree args;
1424 int flags ATTRIBUTE_UNUSED;
1425 bool *no_add_attrs;
1427 if (TREE_CODE (*node) != FUNCTION_TYPE
1428 && TREE_CODE (*node) != METHOD_TYPE
1429 && TREE_CODE (*node) != FIELD_DECL
1430 && TREE_CODE (*node) != TYPE_DECL)
1432 warning ("`%s' attribute only applies to functions",
1433 IDENTIFIER_POINTER (name));
1434 *no_add_attrs = true;
1436 else
1438 tree cst;
1440 cst = TREE_VALUE (args);
1441 if (TREE_CODE (cst) != INTEGER_CST)
1443 warning ("`%s' attribute requires an integer constant argument",
1444 IDENTIFIER_POINTER (name));
1445 *no_add_attrs = true;
1447 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1449 warning ("argument to `%s' attribute larger than %d",
1450 IDENTIFIER_POINTER (name), REGPARM_MAX);
1451 *no_add_attrs = true;
1455 return NULL_TREE;
1458 /* Return 0 if the attributes for two types are incompatible, 1 if they
1459 are compatible, and 2 if they are nearly compatible (which causes a
1460 warning to be generated). */
1462 static int
1463 ix86_comp_type_attributes (type1, type2)
1464 tree type1;
1465 tree type2;
1467 /* Check for mismatch of non-default calling convention. */
1468 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1470 if (TREE_CODE (type1) != FUNCTION_TYPE)
1471 return 1;
1473 /* Check for mismatched fastcall types */
1474 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1475 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1476 return 0;
1478 /* Check for mismatched return types (cdecl vs stdcall). */
1479 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1480 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1481 return 0;
1482 return 1;
1485 /* Return the regparm value for a fuctio with the indicated TYPE. */
1487 static int
1488 ix86_fntype_regparm (type)
1489 tree type;
1491 tree attr;
1493 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1494 if (attr)
1495 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1496 else
1497 return ix86_regparm;
1500 /* Return true if EAX is live at the start of the function. Used by
1501 ix86_expand_prologue to determine if we need special help before
1502 calling allocate_stack_worker. */
1504 static bool
1505 ix86_eax_live_at_start_p (void)
1507 /* Cheat. Don't bother working forward from ix86_function_regparm
1508 to the function type to whether an actual argument is located in
1509 eax. Instead just look at cfg info, which is still close enough
1510 to correct at this point. This gives false positives for broken
1511 functions that might use uninitialized data that happens to be
1512 allocated in eax, but who cares? */
1513 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1516 /* Value is the number of bytes of arguments automatically
1517 popped when returning from a subroutine call.
1518 FUNDECL is the declaration node of the function (as a tree),
1519 FUNTYPE is the data type of the function (as a tree),
1520 or for a library call it is an identifier node for the subroutine name.
1521 SIZE is the number of bytes of arguments passed on the stack.
1523 On the 80386, the RTD insn may be used to pop them if the number
1524 of args is fixed, but if the number is variable then the caller
1525 must pop them all. RTD can't be used for library calls now
1526 because the library is compiled with the Unix compiler.
1527 Use of RTD is a selectable option, since it is incompatible with
1528 standard Unix calling sequences. If the option is not selected,
1529 the caller must always pop the args.
1531 The attribute stdcall is equivalent to RTD on a per module basis. */
1534 ix86_return_pops_args (fundecl, funtype, size)
1535 tree fundecl;
1536 tree funtype;
1537 int size;
1539 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1541 /* Cdecl functions override -mrtd, and never pop the stack. */
1542 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1544 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1545 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1546 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1547 rtd = 1;
1549 if (rtd
1550 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1551 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1552 == void_type_node)))
1553 return size;
1556 /* Lose any fake structure return argument if it is passed on the stack. */
1557 if (aggregate_value_p (TREE_TYPE (funtype))
1558 && !TARGET_64BIT)
1560 int nregs = ix86_fntype_regparm (funtype);
1562 if (!nregs)
1563 return GET_MODE_SIZE (Pmode);
1566 return 0;
1569 /* Argument support functions. */
1571 /* Return true when register may be used to pass function parameters. */
1572 bool
1573 ix86_function_arg_regno_p (regno)
1574 int regno;
1576 int i;
1577 if (!TARGET_64BIT)
1578 return (regno < REGPARM_MAX
1579 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1580 if (SSE_REGNO_P (regno) && TARGET_SSE)
1581 return true;
1582 /* RAX is used as hidden argument to va_arg functions. */
1583 if (!regno)
1584 return true;
1585 for (i = 0; i < REGPARM_MAX; i++)
1586 if (regno == x86_64_int_parameter_registers[i])
1587 return true;
1588 return false;
1591 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1592 for a call to a function whose data type is FNTYPE.
1593 For a library call, FNTYPE is 0. */
1595 void
1596 init_cumulative_args (cum, fntype, libname)
1597 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1598 tree fntype; /* tree ptr for function decl */
1599 rtx libname; /* SYMBOL_REF of library name or 0 */
1601 static CUMULATIVE_ARGS zero_cum;
1602 tree param, next_param;
1604 if (TARGET_DEBUG_ARG)
1606 fprintf (stderr, "\ninit_cumulative_args (");
1607 if (fntype)
1608 fprintf (stderr, "fntype code = %s, ret code = %s",
1609 tree_code_name[(int) TREE_CODE (fntype)],
1610 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1611 else
1612 fprintf (stderr, "no fntype");
1614 if (libname)
1615 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1618 *cum = zero_cum;
1620 /* Set up the number of registers to use for passing arguments. */
1621 cum->nregs = ix86_regparm;
1622 cum->sse_nregs = SSE_REGPARM_MAX;
1623 if (fntype && !TARGET_64BIT)
1625 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1627 if (attr)
1628 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1630 cum->maybe_vaarg = false;
1632 /* Use ecx and edx registers if function has fastcall attribute */
1633 if (fntype && !TARGET_64BIT)
1635 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1637 cum->nregs = 2;
1638 cum->fastcall = 1;
1642 /* Determine if this function has variable arguments. This is
1643 indicated by the last argument being 'void_type_mode' if there
1644 are no variable arguments. If there are variable arguments, then
1645 we won't pass anything in registers */
1647 if (cum->nregs)
1649 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1650 param != 0; param = next_param)
1652 next_param = TREE_CHAIN (param);
1653 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1655 if (!TARGET_64BIT)
1657 cum->nregs = 0;
1658 cum->fastcall = 0;
1660 cum->maybe_vaarg = true;
1664 if ((!fntype && !libname)
1665 || (fntype && !TYPE_ARG_TYPES (fntype)))
1666 cum->maybe_vaarg = 1;
1668 if (TARGET_DEBUG_ARG)
1669 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1671 return;
1674 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1675 of this code is to classify each 8bytes of incoming argument by the register
1676 class and assign registers accordingly. */
1678 /* Return the union class of CLASS1 and CLASS2.
1679 See the x86-64 PS ABI for details. */
1681 static enum x86_64_reg_class
1682 merge_classes (class1, class2)
1683 enum x86_64_reg_class class1, class2;
1685 /* Rule #1: If both classes are equal, this is the resulting class. */
1686 if (class1 == class2)
1687 return class1;
1689 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1690 the other class. */
1691 if (class1 == X86_64_NO_CLASS)
1692 return class2;
1693 if (class2 == X86_64_NO_CLASS)
1694 return class1;
1696 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1697 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1698 return X86_64_MEMORY_CLASS;
1700 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1701 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1702 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1703 return X86_64_INTEGERSI_CLASS;
1704 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1705 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1706 return X86_64_INTEGER_CLASS;
1708 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1709 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1710 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1711 return X86_64_MEMORY_CLASS;
1713 /* Rule #6: Otherwise class SSE is used. */
1714 return X86_64_SSE_CLASS;
1717 /* Classify the argument of type TYPE and mode MODE.
1718 CLASSES will be filled by the register class used to pass each word
1719 of the operand. The number of words is returned. In case the parameter
1720 should be passed in memory, 0 is returned. As a special case for zero
1721 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1723 BIT_OFFSET is used internally for handling records and specifies offset
1724 of the offset in bits modulo 256 to avoid overflow cases.
1726 See the x86-64 PS ABI for details.
1729 static int
1730 classify_argument (mode, type, classes, bit_offset)
1731 enum machine_mode mode;
1732 tree type;
1733 enum x86_64_reg_class classes[MAX_CLASSES];
1734 int bit_offset;
1736 int bytes =
1737 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1738 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1740 /* Variable sized entities are always passed/returned in memory. */
1741 if (bytes < 0)
1742 return 0;
1744 if (type && AGGREGATE_TYPE_P (type))
1746 int i;
1747 tree field;
1748 enum x86_64_reg_class subclasses[MAX_CLASSES];
1750 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1751 if (bytes > 16)
1752 return 0;
1754 for (i = 0; i < words; i++)
1755 classes[i] = X86_64_NO_CLASS;
1757 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1758 signalize memory class, so handle it as special case. */
1759 if (!words)
1761 classes[0] = X86_64_NO_CLASS;
1762 return 1;
1765 /* Classify each field of record and merge classes. */
1766 if (TREE_CODE (type) == RECORD_TYPE)
1768 /* For classes first merge in the field of the subclasses. */
1769 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1771 tree bases = TYPE_BINFO_BASETYPES (type);
1772 int n_bases = TREE_VEC_LENGTH (bases);
1773 int i;
1775 for (i = 0; i < n_bases; ++i)
1777 tree binfo = TREE_VEC_ELT (bases, i);
1778 int num;
1779 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1780 tree type = BINFO_TYPE (binfo);
1782 num = classify_argument (TYPE_MODE (type),
1783 type, subclasses,
1784 (offset + bit_offset) % 256);
1785 if (!num)
1786 return 0;
1787 for (i = 0; i < num; i++)
1789 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1790 classes[i + pos] =
1791 merge_classes (subclasses[i], classes[i + pos]);
1795 /* And now merge the fields of structure. */
1796 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1798 if (TREE_CODE (field) == FIELD_DECL)
1800 int num;
1802 /* Bitfields are always classified as integer. Handle them
1803 early, since later code would consider them to be
1804 misaligned integers. */
1805 if (DECL_BIT_FIELD (field))
1807 for (i = int_bit_position (field) / 8 / 8;
1808 i < (int_bit_position (field)
1809 + tree_low_cst (DECL_SIZE (field), 0)
1810 + 63) / 8 / 8; i++)
1811 classes[i] =
1812 merge_classes (X86_64_INTEGER_CLASS,
1813 classes[i]);
1815 else
1817 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1818 TREE_TYPE (field), subclasses,
1819 (int_bit_position (field)
1820 + bit_offset) % 256);
1821 if (!num)
1822 return 0;
1823 for (i = 0; i < num; i++)
1825 int pos =
1826 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1827 classes[i + pos] =
1828 merge_classes (subclasses[i], classes[i + pos]);
1834 /* Arrays are handled as small records. */
1835 else if (TREE_CODE (type) == ARRAY_TYPE)
1837 int num;
1838 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1839 TREE_TYPE (type), subclasses, bit_offset);
1840 if (!num)
1841 return 0;
1843 /* The partial classes are now full classes. */
1844 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1845 subclasses[0] = X86_64_SSE_CLASS;
1846 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1847 subclasses[0] = X86_64_INTEGER_CLASS;
1849 for (i = 0; i < words; i++)
1850 classes[i] = subclasses[i % num];
1852 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1853 else if (TREE_CODE (type) == UNION_TYPE
1854 || TREE_CODE (type) == QUAL_UNION_TYPE)
1856 /* For classes first merge in the field of the subclasses. */
1857 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1859 tree bases = TYPE_BINFO_BASETYPES (type);
1860 int n_bases = TREE_VEC_LENGTH (bases);
1861 int i;
1863 for (i = 0; i < n_bases; ++i)
1865 tree binfo = TREE_VEC_ELT (bases, i);
1866 int num;
1867 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1868 tree type = BINFO_TYPE (binfo);
1870 num = classify_argument (TYPE_MODE (type),
1871 type, subclasses,
1872 (offset + (bit_offset % 64)) % 256);
1873 if (!num)
1874 return 0;
1875 for (i = 0; i < num; i++)
1877 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1878 classes[i + pos] =
1879 merge_classes (subclasses[i], classes[i + pos]);
1883 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1885 if (TREE_CODE (field) == FIELD_DECL)
1887 int num;
1888 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1889 TREE_TYPE (field), subclasses,
1890 bit_offset);
1891 if (!num)
1892 return 0;
1893 for (i = 0; i < num; i++)
1894 classes[i] = merge_classes (subclasses[i], classes[i]);
1898 else
1899 abort ();
1901 /* Final merger cleanup. */
1902 for (i = 0; i < words; i++)
1904 /* If one class is MEMORY, everything should be passed in
1905 memory. */
1906 if (classes[i] == X86_64_MEMORY_CLASS)
1907 return 0;
1909 /* The X86_64_SSEUP_CLASS should be always preceded by
1910 X86_64_SSE_CLASS. */
1911 if (classes[i] == X86_64_SSEUP_CLASS
1912 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1913 classes[i] = X86_64_SSE_CLASS;
1915 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1916 if (classes[i] == X86_64_X87UP_CLASS
1917 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1918 classes[i] = X86_64_SSE_CLASS;
1920 return words;
1923 /* Compute alignment needed. We align all types to natural boundaries with
1924 exception of XFmode that is aligned to 64bits. */
1925 if (mode != VOIDmode && mode != BLKmode)
1927 int mode_alignment = GET_MODE_BITSIZE (mode);
1929 if (mode == XFmode)
1930 mode_alignment = 128;
1931 else if (mode == XCmode)
1932 mode_alignment = 256;
1933 /* Misaligned fields are always returned in memory. */
1934 if (bit_offset % mode_alignment)
1935 return 0;
1938 /* Classification of atomic types. */
1939 switch (mode)
1941 case DImode:
1942 case SImode:
1943 case HImode:
1944 case QImode:
1945 case CSImode:
1946 case CHImode:
1947 case CQImode:
1948 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1949 classes[0] = X86_64_INTEGERSI_CLASS;
1950 else
1951 classes[0] = X86_64_INTEGER_CLASS;
1952 return 1;
1953 case CDImode:
1954 case TImode:
1955 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1956 return 2;
1957 case CTImode:
1958 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1959 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1960 return 4;
1961 case SFmode:
1962 if (!(bit_offset % 64))
1963 classes[0] = X86_64_SSESF_CLASS;
1964 else
1965 classes[0] = X86_64_SSE_CLASS;
1966 return 1;
1967 case DFmode:
1968 classes[0] = X86_64_SSEDF_CLASS;
1969 return 1;
1970 case TFmode:
1971 classes[0] = X86_64_X87_CLASS;
1972 classes[1] = X86_64_X87UP_CLASS;
1973 return 2;
1974 case TCmode:
1975 classes[0] = X86_64_X87_CLASS;
1976 classes[1] = X86_64_X87UP_CLASS;
1977 classes[2] = X86_64_X87_CLASS;
1978 classes[3] = X86_64_X87UP_CLASS;
1979 return 4;
1980 case DCmode:
1981 classes[0] = X86_64_SSEDF_CLASS;
1982 classes[1] = X86_64_SSEDF_CLASS;
1983 return 2;
1984 case SCmode:
1985 classes[0] = X86_64_SSE_CLASS;
1986 return 1;
1987 case V4SFmode:
1988 case V4SImode:
1989 case V16QImode:
1990 case V8HImode:
1991 case V2DFmode:
1992 case V2DImode:
1993 classes[0] = X86_64_SSE_CLASS;
1994 classes[1] = X86_64_SSEUP_CLASS;
1995 return 2;
1996 case V2SFmode:
1997 case V2SImode:
1998 case V4HImode:
1999 case V8QImode:
2000 return 0;
2001 case BLKmode:
2002 case VOIDmode:
2003 return 0;
2004 default:
2005 abort ();
2009 /* Examine the argument and return set number of register required in each
2010 class. Return 0 iff parameter should be passed in memory. */
2011 static int
2012 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2013 enum machine_mode mode;
2014 tree type;
2015 int *int_nregs, *sse_nregs;
2016 int in_return;
2018 enum x86_64_reg_class class[MAX_CLASSES];
2019 int n = classify_argument (mode, type, class, 0);
2021 *int_nregs = 0;
2022 *sse_nregs = 0;
2023 if (!n)
2024 return 0;
2025 for (n--; n >= 0; n--)
2026 switch (class[n])
2028 case X86_64_INTEGER_CLASS:
2029 case X86_64_INTEGERSI_CLASS:
2030 (*int_nregs)++;
2031 break;
2032 case X86_64_SSE_CLASS:
2033 case X86_64_SSESF_CLASS:
2034 case X86_64_SSEDF_CLASS:
2035 (*sse_nregs)++;
2036 break;
2037 case X86_64_NO_CLASS:
2038 case X86_64_SSEUP_CLASS:
2039 break;
2040 case X86_64_X87_CLASS:
2041 case X86_64_X87UP_CLASS:
2042 if (!in_return)
2043 return 0;
2044 break;
2045 case X86_64_MEMORY_CLASS:
2046 abort ();
2048 return 1;
2050 /* Construct container for the argument used by GCC interface. See
2051 FUNCTION_ARG for the detailed description. */
2052 static rtx
2053 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2054 enum machine_mode mode;
2055 tree type;
2056 int in_return;
2057 int nintregs, nsseregs;
2058 const int * intreg;
2059 int sse_regno;
2061 enum machine_mode tmpmode;
2062 int bytes =
2063 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2064 enum x86_64_reg_class class[MAX_CLASSES];
2065 int n;
2066 int i;
2067 int nexps = 0;
2068 int needed_sseregs, needed_intregs;
2069 rtx exp[MAX_CLASSES];
2070 rtx ret;
2072 n = classify_argument (mode, type, class, 0);
2073 if (TARGET_DEBUG_ARG)
2075 if (!n)
2076 fprintf (stderr, "Memory class\n");
2077 else
2079 fprintf (stderr, "Classes:");
2080 for (i = 0; i < n; i++)
2082 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2084 fprintf (stderr, "\n");
2087 if (!n)
2088 return NULL;
2089 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2090 return NULL;
2091 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2092 return NULL;
2094 /* First construct simple cases. Avoid SCmode, since we want to use
2095 single register to pass this type. */
2096 if (n == 1 && mode != SCmode)
2097 switch (class[0])
2099 case X86_64_INTEGER_CLASS:
2100 case X86_64_INTEGERSI_CLASS:
2101 return gen_rtx_REG (mode, intreg[0]);
2102 case X86_64_SSE_CLASS:
2103 case X86_64_SSESF_CLASS:
2104 case X86_64_SSEDF_CLASS:
2105 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2106 case X86_64_X87_CLASS:
2107 return gen_rtx_REG (mode, FIRST_STACK_REG);
2108 case X86_64_NO_CLASS:
2109 /* Zero sized array, struct or class. */
2110 return NULL;
2111 default:
2112 abort ();
2114 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2115 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2116 if (n == 2
2117 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2118 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2119 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2120 && class[1] == X86_64_INTEGER_CLASS
2121 && (mode == CDImode || mode == TImode)
2122 && intreg[0] + 1 == intreg[1])
2123 return gen_rtx_REG (mode, intreg[0]);
2124 if (n == 4
2125 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2126 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2127 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2129 /* Otherwise figure out the entries of the PARALLEL. */
2130 for (i = 0; i < n; i++)
2132 switch (class[i])
2134 case X86_64_NO_CLASS:
2135 break;
2136 case X86_64_INTEGER_CLASS:
2137 case X86_64_INTEGERSI_CLASS:
2138 /* Merge TImodes on aligned occassions here too. */
2139 if (i * 8 + 8 > bytes)
2140 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2141 else if (class[i] == X86_64_INTEGERSI_CLASS)
2142 tmpmode = SImode;
2143 else
2144 tmpmode = DImode;
2145 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2146 if (tmpmode == BLKmode)
2147 tmpmode = DImode;
2148 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2149 gen_rtx_REG (tmpmode, *intreg),
2150 GEN_INT (i*8));
2151 intreg++;
2152 break;
2153 case X86_64_SSESF_CLASS:
2154 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2155 gen_rtx_REG (SFmode,
2156 SSE_REGNO (sse_regno)),
2157 GEN_INT (i*8));
2158 sse_regno++;
2159 break;
2160 case X86_64_SSEDF_CLASS:
2161 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2162 gen_rtx_REG (DFmode,
2163 SSE_REGNO (sse_regno)),
2164 GEN_INT (i*8));
2165 sse_regno++;
2166 break;
2167 case X86_64_SSE_CLASS:
2168 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2169 tmpmode = TImode;
2170 else
2171 tmpmode = DImode;
2172 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2173 gen_rtx_REG (tmpmode,
2174 SSE_REGNO (sse_regno)),
2175 GEN_INT (i*8));
2176 if (tmpmode == TImode)
2177 i++;
2178 sse_regno++;
2179 break;
2180 default:
2181 abort ();
2184 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2185 for (i = 0; i < nexps; i++)
2186 XVECEXP (ret, 0, i) = exp [i];
2187 return ret;
2190 /* Update the data in CUM to advance over an argument
2191 of mode MODE and data type TYPE.
2192 (TYPE is null for libcalls where that information may not be available.) */
2194 void
2195 function_arg_advance (cum, mode, type, named)
2196 CUMULATIVE_ARGS *cum; /* current arg information */
2197 enum machine_mode mode; /* current arg mode */
2198 tree type; /* type of the argument or 0 if lib support */
2199 int named; /* whether or not the argument was named */
2201 int bytes =
2202 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2203 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2205 if (TARGET_DEBUG_ARG)
2206 fprintf (stderr,
2207 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2208 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2209 if (TARGET_64BIT)
2211 int int_nregs, sse_nregs;
2212 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2213 cum->words += words;
2214 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2216 cum->nregs -= int_nregs;
2217 cum->sse_nregs -= sse_nregs;
2218 cum->regno += int_nregs;
2219 cum->sse_regno += sse_nregs;
2221 else
2222 cum->words += words;
2224 else
2226 if (TARGET_SSE && mode == TImode)
2228 cum->sse_words += words;
2229 cum->sse_nregs -= 1;
2230 cum->sse_regno += 1;
2231 if (cum->sse_nregs <= 0)
2233 cum->sse_nregs = 0;
2234 cum->sse_regno = 0;
2237 else
2239 cum->words += words;
2240 cum->nregs -= words;
2241 cum->regno += words;
2243 if (cum->nregs <= 0)
2245 cum->nregs = 0;
2246 cum->regno = 0;
2250 return;
2253 /* Define where to put the arguments to a function.
2254 Value is zero to push the argument on the stack,
2255 or a hard register in which to store the argument.
2257 MODE is the argument's machine mode.
2258 TYPE is the data type of the argument (as a tree).
2259 This is null for libcalls where that information may
2260 not be available.
2261 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2262 the preceding args and about the function being called.
2263 NAMED is nonzero if this argument is a named parameter
2264 (otherwise it is an extra parameter matching an ellipsis). */
2267 function_arg (cum, mode, type, named)
2268 CUMULATIVE_ARGS *cum; /* current arg information */
2269 enum machine_mode mode; /* current arg mode */
2270 tree type; /* type of the argument or 0 if lib support */
2271 int named; /* != 0 for normal args, == 0 for ... args */
2273 rtx ret = NULL_RTX;
2274 int bytes =
2275 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2276 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2278 /* Handle an hidden AL argument containing number of registers for varargs
2279 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2280 any AL settings. */
2281 if (mode == VOIDmode)
2283 if (TARGET_64BIT)
2284 return GEN_INT (cum->maybe_vaarg
2285 ? (cum->sse_nregs < 0
2286 ? SSE_REGPARM_MAX
2287 : cum->sse_regno)
2288 : -1);
2289 else
2290 return constm1_rtx;
2292 if (TARGET_64BIT)
2293 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2294 &x86_64_int_parameter_registers [cum->regno],
2295 cum->sse_regno);
2296 else
2297 switch (mode)
2299 /* For now, pass fp/complex values on the stack. */
2300 default:
2301 break;
2303 case BLKmode:
2304 if (bytes < 0)
2305 break;
2306 /* FALLTHRU */
2307 case DImode:
2308 case SImode:
2309 case HImode:
2310 case QImode:
2311 if (words <= cum->nregs)
2313 int regno = cum->regno;
2315 /* Fastcall allocates the first two DWORD (SImode) or
2316 smaller arguments to ECX and EDX. */
2317 if (cum->fastcall)
2319 if (mode == BLKmode || mode == DImode)
2320 break;
2322 /* ECX not EAX is the first allocated register. */
2323 if (regno == 0)
2324 regno = 2;
2326 ret = gen_rtx_REG (mode, regno);
2328 break;
2329 case TImode:
2330 if (cum->sse_nregs)
2331 ret = gen_rtx_REG (mode, cum->sse_regno);
2332 break;
2335 if (TARGET_DEBUG_ARG)
2337 fprintf (stderr,
2338 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2339 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2341 if (ret)
2342 print_simple_rtl (stderr, ret);
2343 else
2344 fprintf (stderr, ", stack");
2346 fprintf (stderr, " )\n");
2349 return ret;
2352 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2353 ABI */
2354 static bool
2355 contains_128bit_aligned_vector_p (type)
2356 tree type;
2358 enum machine_mode mode = TYPE_MODE (type);
2359 if (SSE_REG_MODE_P (mode)
2360 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2361 return true;
2362 if (TYPE_ALIGN (type) < 128)
2363 return false;
2365 if (AGGREGATE_TYPE_P (type))
2367 /* Walk the agregates recursivly. */
2368 if (TREE_CODE (type) == RECORD_TYPE
2369 || TREE_CODE (type) == UNION_TYPE
2370 || TREE_CODE (type) == QUAL_UNION_TYPE)
2372 tree field;
2374 if (TYPE_BINFO (type) != NULL
2375 && TYPE_BINFO_BASETYPES (type) != NULL)
2377 tree bases = TYPE_BINFO_BASETYPES (type);
2378 int n_bases = TREE_VEC_LENGTH (bases);
2379 int i;
2381 for (i = 0; i < n_bases; ++i)
2383 tree binfo = TREE_VEC_ELT (bases, i);
2384 tree type = BINFO_TYPE (binfo);
2386 if (contains_128bit_aligned_vector_p (type))
2387 return true;
2390 /* And now merge the fields of structure. */
2391 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2393 if (TREE_CODE (field) == FIELD_DECL
2394 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2395 return true;
2398 /* Just for use if some languages passes arrays by value. */
2399 else if (TREE_CODE (type) == ARRAY_TYPE)
2401 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2402 return true;
2404 else
2405 abort ();
2407 return false;
2410 /* A C expression that indicates when an argument must be passed by
2411 reference. If nonzero for an argument, a copy of that argument is
2412 made in memory and a pointer to the argument is passed instead of
2413 the argument itself. The pointer is passed in whatever way is
2414 appropriate for passing a pointer to that type. */
2417 function_arg_pass_by_reference (cum, mode, type, named)
2418 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2419 enum machine_mode mode ATTRIBUTE_UNUSED;
2420 tree type;
2421 int named ATTRIBUTE_UNUSED;
2423 if (!TARGET_64BIT)
2424 return 0;
2426 if (type && int_size_in_bytes (type) == -1)
2428 if (TARGET_DEBUG_ARG)
2429 fprintf (stderr, "function_arg_pass_by_reference\n");
2430 return 1;
2433 return 0;
2436 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2437 and type. */
2440 ix86_function_arg_boundary (mode, type)
2441 enum machine_mode mode;
2442 tree type;
2444 int align;
2445 if (type)
2446 align = TYPE_ALIGN (type);
2447 else
2448 align = GET_MODE_ALIGNMENT (mode);
2449 if (align < PARM_BOUNDARY)
2450 align = PARM_BOUNDARY;
2451 if (!TARGET_64BIT)
2453 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2454 make an exception for SSE modes since these require 128bit
2455 alignment.
2457 The handling here differs from field_alignment. ICC aligns MMX
2458 arguments to 4 byte boundaries, while structure fields are aligned
2459 to 8 byte boundaries. */
2460 if (!type)
2462 if (!SSE_REG_MODE_P (mode))
2463 align = PARM_BOUNDARY;
2465 else
2467 if (!contains_128bit_aligned_vector_p (type))
2468 align = PARM_BOUNDARY;
2470 if (align != PARM_BOUNDARY && !TARGET_SSE)
2471 abort();
2473 if (align > 128)
2474 align = 128;
2475 return align;
2478 /* Return true if N is a possible register number of function value. */
2479 bool
2480 ix86_function_value_regno_p (regno)
2481 int regno;
2483 if (!TARGET_64BIT)
2485 return ((regno) == 0
2486 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2487 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2489 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2490 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2491 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2494 /* Define how to find the value returned by a function.
2495 VALTYPE is the data type of the value (as a tree).
2496 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2497 otherwise, FUNC is 0. */
2499 ix86_function_value (valtype)
2500 tree valtype;
2502 if (TARGET_64BIT)
2504 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2505 REGPARM_MAX, SSE_REGPARM_MAX,
2506 x86_64_int_return_registers, 0);
2507 /* For zero sized structures, construct_continer return NULL, but we need
2508 to keep rest of compiler happy by returning meaningfull value. */
2509 if (!ret)
2510 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2511 return ret;
2513 else
2514 return gen_rtx_REG (TYPE_MODE (valtype),
2515 ix86_value_regno (TYPE_MODE (valtype)));
2518 /* Return false iff type is returned in memory. */
2520 ix86_return_in_memory (type)
2521 tree type;
2523 int needed_intregs, needed_sseregs, size;
2524 enum machine_mode mode = TYPE_MODE (type);
2526 if (TARGET_64BIT)
2527 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2529 if (mode == BLKmode)
2530 return 1;
2532 size = int_size_in_bytes (type);
2534 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2535 return 0;
2537 if (VECTOR_MODE_P (mode) || mode == TImode)
2539 /* User-created vectors small enough to fit in EAX. */
2540 if (size < 8)
2541 return 0;
2543 /* MMX/3dNow values are returned on the stack, since we've
2544 got to EMMS/FEMMS before returning. */
2545 if (size == 8)
2546 return 1;
2548 /* SSE values are returned in XMM0. */
2549 /* ??? Except when it doesn't exist? We have a choice of
2550 either (1) being abi incompatible with a -march switch,
2551 or (2) generating an error here. Given no good solution,
2552 I think the safest thing is one warning. The user won't
2553 be able to use -Werror, but... */
2554 if (size == 16)
2556 static bool warned;
2558 if (TARGET_SSE)
2559 return 0;
2561 if (!warned)
2563 warned = true;
2564 warning ("SSE vector return without SSE enabled "
2565 "changes the ABI");
2567 return 1;
2571 if (mode == TFmode)
2572 return 0;
2573 if (size > 12)
2574 return 1;
2575 return 0;
2578 /* Define how to find the value returned by a library function
2579 assuming the value has mode MODE. */
2581 ix86_libcall_value (mode)
2582 enum machine_mode mode;
2584 if (TARGET_64BIT)
2586 switch (mode)
2588 case SFmode:
2589 case SCmode:
2590 case DFmode:
2591 case DCmode:
2592 return gen_rtx_REG (mode, FIRST_SSE_REG);
2593 case TFmode:
2594 case TCmode:
2595 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2596 default:
2597 return gen_rtx_REG (mode, 0);
2600 else
2601 return gen_rtx_REG (mode, ix86_value_regno (mode));
2604 /* Given a mode, return the register to use for a return value. */
2606 static int
2607 ix86_value_regno (mode)
2608 enum machine_mode mode;
2610 /* Floating point return values in %st(0). */
2611 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2612 return FIRST_FLOAT_REG;
2613 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2614 we prevent this case when sse is not available. */
2615 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2616 return FIRST_SSE_REG;
2617 /* Everything else in %eax. */
2618 return 0;
2621 /* Create the va_list data type. */
2623 tree
2624 ix86_build_va_list ()
2626 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2628 /* For i386 we use plain pointer to argument area. */
2629 if (!TARGET_64BIT)
2630 return build_pointer_type (char_type_node);
2632 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2633 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2635 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2636 unsigned_type_node);
2637 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2638 unsigned_type_node);
2639 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2640 ptr_type_node);
2641 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2642 ptr_type_node);
2644 DECL_FIELD_CONTEXT (f_gpr) = record;
2645 DECL_FIELD_CONTEXT (f_fpr) = record;
2646 DECL_FIELD_CONTEXT (f_ovf) = record;
2647 DECL_FIELD_CONTEXT (f_sav) = record;
2649 TREE_CHAIN (record) = type_decl;
2650 TYPE_NAME (record) = type_decl;
2651 TYPE_FIELDS (record) = f_gpr;
2652 TREE_CHAIN (f_gpr) = f_fpr;
2653 TREE_CHAIN (f_fpr) = f_ovf;
2654 TREE_CHAIN (f_ovf) = f_sav;
2656 layout_type (record);
2658 /* The correct type is an array type of one element. */
2659 return build_array_type (record, build_index_type (size_zero_node));
2662 /* Perform any needed actions needed for a function that is receiving a
2663 variable number of arguments.
2665 CUM is as above.
2667 MODE and TYPE are the mode and type of the current parameter.
2669 PRETEND_SIZE is a variable that should be set to the amount of stack
2670 that must be pushed by the prolog to pretend that our caller pushed
2673 Normally, this macro will push all remaining incoming registers on the
2674 stack and set PRETEND_SIZE to the length of the registers pushed. */
2676 void
2677 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2678 CUMULATIVE_ARGS *cum;
2679 enum machine_mode mode;
2680 tree type;
2681 int *pretend_size ATTRIBUTE_UNUSED;
2682 int no_rtl;
2685 CUMULATIVE_ARGS next_cum;
2686 rtx save_area = NULL_RTX, mem;
2687 rtx label;
2688 rtx label_ref;
2689 rtx tmp_reg;
2690 rtx nsse_reg;
2691 int set;
2692 tree fntype;
2693 int stdarg_p;
2694 int i;
2696 if (!TARGET_64BIT)
2697 return;
2699 /* Indicate to allocate space on the stack for varargs save area. */
2700 ix86_save_varrargs_registers = 1;
2702 cfun->stack_alignment_needed = 128;
2704 fntype = TREE_TYPE (current_function_decl);
2705 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2706 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2707 != void_type_node));
2709 /* For varargs, we do not want to skip the dummy va_dcl argument.
2710 For stdargs, we do want to skip the last named argument. */
2711 next_cum = *cum;
2712 if (stdarg_p)
2713 function_arg_advance (&next_cum, mode, type, 1);
2715 if (!no_rtl)
2716 save_area = frame_pointer_rtx;
2718 set = get_varargs_alias_set ();
2720 for (i = next_cum.regno; i < ix86_regparm; i++)
2722 mem = gen_rtx_MEM (Pmode,
2723 plus_constant (save_area, i * UNITS_PER_WORD));
2724 set_mem_alias_set (mem, set);
2725 emit_move_insn (mem, gen_rtx_REG (Pmode,
2726 x86_64_int_parameter_registers[i]));
2729 if (next_cum.sse_nregs)
2731 /* Now emit code to save SSE registers. The AX parameter contains number
2732 of SSE parameter regsiters used to call this function. We use
2733 sse_prologue_save insn template that produces computed jump across
2734 SSE saves. We need some preparation work to get this working. */
2736 label = gen_label_rtx ();
2737 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2739 /* Compute address to jump to :
2740 label - 5*eax + nnamed_sse_arguments*5 */
2741 tmp_reg = gen_reg_rtx (Pmode);
2742 nsse_reg = gen_reg_rtx (Pmode);
2743 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2744 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2745 gen_rtx_MULT (Pmode, nsse_reg,
2746 GEN_INT (4))));
2747 if (next_cum.sse_regno)
2748 emit_move_insn
2749 (nsse_reg,
2750 gen_rtx_CONST (DImode,
2751 gen_rtx_PLUS (DImode,
2752 label_ref,
2753 GEN_INT (next_cum.sse_regno * 4))));
2754 else
2755 emit_move_insn (nsse_reg, label_ref);
2756 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2758 /* Compute address of memory block we save into. We always use pointer
2759 pointing 127 bytes after first byte to store - this is needed to keep
2760 instruction size limited by 4 bytes. */
2761 tmp_reg = gen_reg_rtx (Pmode);
2762 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2763 plus_constant (save_area,
2764 8 * REGPARM_MAX + 127)));
2765 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2766 set_mem_alias_set (mem, set);
2767 set_mem_align (mem, BITS_PER_WORD);
2769 /* And finally do the dirty job! */
2770 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2771 GEN_INT (next_cum.sse_regno), label));
2776 /* Implement va_start. */
2778 void
2779 ix86_va_start (valist, nextarg)
2780 tree valist;
2781 rtx nextarg;
2783 HOST_WIDE_INT words, n_gpr, n_fpr;
2784 tree f_gpr, f_fpr, f_ovf, f_sav;
2785 tree gpr, fpr, ovf, sav, t;
2787 /* Only 64bit target needs something special. */
2788 if (!TARGET_64BIT)
2790 std_expand_builtin_va_start (valist, nextarg);
2791 return;
2794 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2795 f_fpr = TREE_CHAIN (f_gpr);
2796 f_ovf = TREE_CHAIN (f_fpr);
2797 f_sav = TREE_CHAIN (f_ovf);
2799 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2800 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2801 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2802 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2803 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2805 /* Count number of gp and fp argument registers used. */
2806 words = current_function_args_info.words;
2807 n_gpr = current_function_args_info.regno;
2808 n_fpr = current_function_args_info.sse_regno;
2810 if (TARGET_DEBUG_ARG)
2811 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2812 (int) words, (int) n_gpr, (int) n_fpr);
2814 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2815 build_int_2 (n_gpr * 8, 0));
2816 TREE_SIDE_EFFECTS (t) = 1;
2817 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2819 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2820 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2821 TREE_SIDE_EFFECTS (t) = 1;
2822 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2824 /* Find the overflow area. */
2825 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2826 if (words != 0)
2827 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2828 build_int_2 (words * UNITS_PER_WORD, 0));
2829 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2830 TREE_SIDE_EFFECTS (t) = 1;
2831 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2833 /* Find the register save area.
2834 Prologue of the function save it right above stack frame. */
2835 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2836 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2837 TREE_SIDE_EFFECTS (t) = 1;
2838 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2841 /* Implement va_arg. */
2843 ix86_va_arg (valist, type)
2844 tree valist, type;
2846 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2847 tree f_gpr, f_fpr, f_ovf, f_sav;
2848 tree gpr, fpr, ovf, sav, t;
2849 int size, rsize;
2850 rtx lab_false, lab_over = NULL_RTX;
2851 rtx addr_rtx, r;
2852 rtx container;
2853 int indirect_p = 0;
2855 /* Only 64bit target needs something special. */
2856 if (!TARGET_64BIT)
2858 return std_expand_builtin_va_arg (valist, type);
2861 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2862 f_fpr = TREE_CHAIN (f_gpr);
2863 f_ovf = TREE_CHAIN (f_fpr);
2864 f_sav = TREE_CHAIN (f_ovf);
2866 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2867 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2868 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2869 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2870 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2872 size = int_size_in_bytes (type);
2873 if (size == -1)
2875 /* Passed by reference. */
2876 indirect_p = 1;
2877 type = build_pointer_type (type);
2878 size = int_size_in_bytes (type);
2880 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2882 container = construct_container (TYPE_MODE (type), type, 0,
2883 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2885 * Pull the value out of the saved registers ...
2888 addr_rtx = gen_reg_rtx (Pmode);
2890 if (container)
2892 rtx int_addr_rtx, sse_addr_rtx;
2893 int needed_intregs, needed_sseregs;
2894 int need_temp;
2896 lab_over = gen_label_rtx ();
2897 lab_false = gen_label_rtx ();
2899 examine_argument (TYPE_MODE (type), type, 0,
2900 &needed_intregs, &needed_sseregs);
2903 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2904 || TYPE_ALIGN (type) > 128);
2906 /* In case we are passing structure, verify that it is consetuctive block
2907 on the register save area. If not we need to do moves. */
2908 if (!need_temp && !REG_P (container))
2910 /* Verify that all registers are strictly consetuctive */
2911 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2913 int i;
2915 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2917 rtx slot = XVECEXP (container, 0, i);
2918 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2919 || INTVAL (XEXP (slot, 1)) != i * 16)
2920 need_temp = 1;
2923 else
2925 int i;
2927 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2929 rtx slot = XVECEXP (container, 0, i);
2930 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2931 || INTVAL (XEXP (slot, 1)) != i * 8)
2932 need_temp = 1;
2936 if (!need_temp)
2938 int_addr_rtx = addr_rtx;
2939 sse_addr_rtx = addr_rtx;
2941 else
2943 int_addr_rtx = gen_reg_rtx (Pmode);
2944 sse_addr_rtx = gen_reg_rtx (Pmode);
2946 /* First ensure that we fit completely in registers. */
2947 if (needed_intregs)
2949 emit_cmp_and_jump_insns (expand_expr
2950 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2951 GEN_INT ((REGPARM_MAX - needed_intregs +
2952 1) * 8), GE, const1_rtx, SImode,
2953 1, lab_false);
2955 if (needed_sseregs)
2957 emit_cmp_and_jump_insns (expand_expr
2958 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2959 GEN_INT ((SSE_REGPARM_MAX -
2960 needed_sseregs + 1) * 16 +
2961 REGPARM_MAX * 8), GE, const1_rtx,
2962 SImode, 1, lab_false);
2965 /* Compute index to start of area used for integer regs. */
2966 if (needed_intregs)
2968 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2969 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2970 if (r != int_addr_rtx)
2971 emit_move_insn (int_addr_rtx, r);
2973 if (needed_sseregs)
2975 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2976 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2977 if (r != sse_addr_rtx)
2978 emit_move_insn (sse_addr_rtx, r);
2980 if (need_temp)
2982 int i;
2983 rtx mem;
2984 rtx x;
2986 /* Never use the memory itself, as it has the alias set. */
2987 x = XEXP (assign_temp (type, 0, 1, 0), 0);
2988 mem = gen_rtx_MEM (BLKmode, x);
2989 force_operand (x, addr_rtx);
2990 set_mem_alias_set (mem, get_varargs_alias_set ());
2991 set_mem_align (mem, BITS_PER_UNIT);
2993 for (i = 0; i < XVECLEN (container, 0); i++)
2995 rtx slot = XVECEXP (container, 0, i);
2996 rtx reg = XEXP (slot, 0);
2997 enum machine_mode mode = GET_MODE (reg);
2998 rtx src_addr;
2999 rtx src_mem;
3000 int src_offset;
3001 rtx dest_mem;
3003 if (SSE_REGNO_P (REGNO (reg)))
3005 src_addr = sse_addr_rtx;
3006 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3008 else
3010 src_addr = int_addr_rtx;
3011 src_offset = REGNO (reg) * 8;
3013 src_mem = gen_rtx_MEM (mode, src_addr);
3014 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3015 src_mem = adjust_address (src_mem, mode, src_offset);
3016 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3017 emit_move_insn (dest_mem, src_mem);
3021 if (needed_intregs)
3024 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3025 build_int_2 (needed_intregs * 8, 0));
3026 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3027 TREE_SIDE_EFFECTS (t) = 1;
3028 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3030 if (needed_sseregs)
3033 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3034 build_int_2 (needed_sseregs * 16, 0));
3035 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3036 TREE_SIDE_EFFECTS (t) = 1;
3037 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3040 emit_jump_insn (gen_jump (lab_over));
3041 emit_barrier ();
3042 emit_label (lab_false);
3045 /* ... otherwise out of the overflow area. */
3047 /* Care for on-stack alignment if needed. */
3048 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3049 t = ovf;
3050 else
3052 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3053 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3054 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3056 t = save_expr (t);
3058 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3059 if (r != addr_rtx)
3060 emit_move_insn (addr_rtx, r);
3063 build (PLUS_EXPR, TREE_TYPE (t), t,
3064 build_int_2 (rsize * UNITS_PER_WORD, 0));
3065 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3066 TREE_SIDE_EFFECTS (t) = 1;
3067 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3069 if (container)
3070 emit_label (lab_over);
3072 if (indirect_p)
3074 r = gen_rtx_MEM (Pmode, addr_rtx);
3075 set_mem_alias_set (r, get_varargs_alias_set ());
3076 emit_move_insn (addr_rtx, r);
3079 return addr_rtx;
3082 /* Return nonzero if OP is either a i387 or SSE fp register. */
3084 any_fp_register_operand (op, mode)
3085 rtx op;
3086 enum machine_mode mode ATTRIBUTE_UNUSED;
3088 return ANY_FP_REG_P (op);
3091 /* Return nonzero if OP is an i387 fp register. */
3093 fp_register_operand (op, mode)
3094 rtx op;
3095 enum machine_mode mode ATTRIBUTE_UNUSED;
3097 return FP_REG_P (op);
3100 /* Return nonzero if OP is a non-fp register_operand. */
3102 register_and_not_any_fp_reg_operand (op, mode)
3103 rtx op;
3104 enum machine_mode mode;
3106 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3109 /* Return nonzero of OP is a register operand other than an
3110 i387 fp register. */
3112 register_and_not_fp_reg_operand (op, mode)
3113 rtx op;
3114 enum machine_mode mode;
3116 return register_operand (op, mode) && !FP_REG_P (op);
3119 /* Return nonzero if OP is general operand representable on x86_64. */
3122 x86_64_general_operand (op, mode)
3123 rtx op;
3124 enum machine_mode mode;
3126 if (!TARGET_64BIT)
3127 return general_operand (op, mode);
3128 if (nonimmediate_operand (op, mode))
3129 return 1;
3130 return x86_64_sign_extended_value (op);
3133 /* Return nonzero if OP is general operand representable on x86_64
3134 as either sign extended or zero extended constant. */
3137 x86_64_szext_general_operand (op, mode)
3138 rtx op;
3139 enum machine_mode mode;
3141 if (!TARGET_64BIT)
3142 return general_operand (op, mode);
3143 if (nonimmediate_operand (op, mode))
3144 return 1;
3145 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3148 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3151 x86_64_nonmemory_operand (op, mode)
3152 rtx op;
3153 enum machine_mode mode;
3155 if (!TARGET_64BIT)
3156 return nonmemory_operand (op, mode);
3157 if (register_operand (op, mode))
3158 return 1;
3159 return x86_64_sign_extended_value (op);
3162 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3165 x86_64_movabs_operand (op, mode)
3166 rtx op;
3167 enum machine_mode mode;
3169 if (!TARGET_64BIT || !flag_pic)
3170 return nonmemory_operand (op, mode);
3171 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3172 return 1;
3173 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3174 return 1;
3175 return 0;
3178 /* Return nonzero if OPNUM's MEM should be matched
3179 in movabs* patterns. */
3182 ix86_check_movabs (insn, opnum)
3183 rtx insn;
3184 int opnum;
3186 rtx set, mem;
3188 set = PATTERN (insn);
3189 if (GET_CODE (set) == PARALLEL)
3190 set = XVECEXP (set, 0, 0);
3191 if (GET_CODE (set) != SET)
3192 abort ();
3193 mem = XEXP (set, opnum);
3194 while (GET_CODE (mem) == SUBREG)
3195 mem = SUBREG_REG (mem);
3196 if (GET_CODE (mem) != MEM)
3197 abort ();
3198 return (volatile_ok || !MEM_VOLATILE_P (mem));
3201 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3204 x86_64_szext_nonmemory_operand (op, mode)
3205 rtx op;
3206 enum machine_mode mode;
3208 if (!TARGET_64BIT)
3209 return nonmemory_operand (op, mode);
3210 if (register_operand (op, mode))
3211 return 1;
3212 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3215 /* Return nonzero if OP is immediate operand representable on x86_64. */
3218 x86_64_immediate_operand (op, mode)
3219 rtx op;
3220 enum machine_mode mode;
3222 if (!TARGET_64BIT)
3223 return immediate_operand (op, mode);
3224 return x86_64_sign_extended_value (op);
3227 /* Return nonzero if OP is immediate operand representable on x86_64. */
3230 x86_64_zext_immediate_operand (op, mode)
3231 rtx op;
3232 enum machine_mode mode ATTRIBUTE_UNUSED;
3234 return x86_64_zero_extended_value (op);
3237 /* Return nonzero if OP is (const_int 1), else return zero. */
3240 const_int_1_operand (op, mode)
3241 rtx op;
3242 enum machine_mode mode ATTRIBUTE_UNUSED;
3244 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3247 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3248 for shift & compare patterns, as shifting by 0 does not change flags),
3249 else return zero. */
3252 const_int_1_31_operand (op, mode)
3253 rtx op;
3254 enum machine_mode mode ATTRIBUTE_UNUSED;
3256 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3259 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3260 reference and a constant. */
3263 symbolic_operand (op, mode)
3264 register rtx op;
3265 enum machine_mode mode ATTRIBUTE_UNUSED;
3267 switch (GET_CODE (op))
3269 case SYMBOL_REF:
3270 case LABEL_REF:
3271 return 1;
3273 case CONST:
3274 op = XEXP (op, 0);
3275 if (GET_CODE (op) == SYMBOL_REF
3276 || GET_CODE (op) == LABEL_REF
3277 || (GET_CODE (op) == UNSPEC
3278 && (XINT (op, 1) == UNSPEC_GOT
3279 || XINT (op, 1) == UNSPEC_GOTOFF
3280 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3281 return 1;
3282 if (GET_CODE (op) != PLUS
3283 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3284 return 0;
3286 op = XEXP (op, 0);
3287 if (GET_CODE (op) == SYMBOL_REF
3288 || GET_CODE (op) == LABEL_REF)
3289 return 1;
3290 /* Only @GOTOFF gets offsets. */
3291 if (GET_CODE (op) != UNSPEC
3292 || XINT (op, 1) != UNSPEC_GOTOFF)
3293 return 0;
3295 op = XVECEXP (op, 0, 0);
3296 if (GET_CODE (op) == SYMBOL_REF
3297 || GET_CODE (op) == LABEL_REF)
3298 return 1;
3299 return 0;
3301 default:
3302 return 0;
3306 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3309 pic_symbolic_operand (op, mode)
3310 register rtx op;
3311 enum machine_mode mode ATTRIBUTE_UNUSED;
3313 if (GET_CODE (op) != CONST)
3314 return 0;
3315 op = XEXP (op, 0);
3316 if (TARGET_64BIT)
3318 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3319 return 1;
3321 else
3323 if (GET_CODE (op) == UNSPEC)
3324 return 1;
3325 if (GET_CODE (op) != PLUS
3326 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3327 return 0;
3328 op = XEXP (op, 0);
3329 if (GET_CODE (op) == UNSPEC)
3330 return 1;
3332 return 0;
3335 /* Return true if OP is a symbolic operand that resolves locally. */
3337 static int
3338 local_symbolic_operand (op, mode)
3339 rtx op;
3340 enum machine_mode mode ATTRIBUTE_UNUSED;
3342 if (GET_CODE (op) == CONST
3343 && GET_CODE (XEXP (op, 0)) == PLUS
3344 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3345 op = XEXP (XEXP (op, 0), 0);
3347 if (GET_CODE (op) == LABEL_REF)
3348 return 1;
3350 if (GET_CODE (op) != SYMBOL_REF)
3351 return 0;
3353 /* These we've been told are local by varasm and encode_section_info
3354 respectively. */
3355 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3356 return 1;
3358 /* There is, however, a not insubstantial body of code in the rest of
3359 the compiler that assumes it can just stick the results of
3360 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3361 /* ??? This is a hack. Should update the body of the compiler to
3362 always create a DECL an invoke targetm.encode_section_info. */
3363 if (strncmp (XSTR (op, 0), internal_label_prefix,
3364 internal_label_prefix_len) == 0)
3365 return 1;
3367 return 0;
3370 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3373 tls_symbolic_operand (op, mode)
3374 register rtx op;
3375 enum machine_mode mode ATTRIBUTE_UNUSED;
3377 const char *symbol_str;
3379 if (GET_CODE (op) != SYMBOL_REF)
3380 return 0;
3381 symbol_str = XSTR (op, 0);
3383 if (symbol_str[0] != '%')
3384 return 0;
3385 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3388 static int
3389 tls_symbolic_operand_1 (op, kind)
3390 rtx op;
3391 enum tls_model kind;
3393 const char *symbol_str;
3395 if (GET_CODE (op) != SYMBOL_REF)
3396 return 0;
3397 symbol_str = XSTR (op, 0);
3399 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3403 global_dynamic_symbolic_operand (op, mode)
3404 register rtx op;
3405 enum machine_mode mode ATTRIBUTE_UNUSED;
3407 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3411 local_dynamic_symbolic_operand (op, mode)
3412 register rtx op;
3413 enum machine_mode mode ATTRIBUTE_UNUSED;
3415 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3419 initial_exec_symbolic_operand (op, mode)
3420 register rtx op;
3421 enum machine_mode mode ATTRIBUTE_UNUSED;
3423 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3427 local_exec_symbolic_operand (op, mode)
3428 register rtx op;
3429 enum machine_mode mode ATTRIBUTE_UNUSED;
3431 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3434 /* Test for a valid operand for a call instruction. Don't allow the
3435 arg pointer register or virtual regs since they may decay into
3436 reg + const, which the patterns can't handle. */
3439 call_insn_operand (op, mode)
3440 rtx op;
3441 enum machine_mode mode ATTRIBUTE_UNUSED;
3443 /* Disallow indirect through a virtual register. This leads to
3444 compiler aborts when trying to eliminate them. */
3445 if (GET_CODE (op) == REG
3446 && (op == arg_pointer_rtx
3447 || op == frame_pointer_rtx
3448 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3449 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3450 return 0;
3452 /* Disallow `call 1234'. Due to varying assembler lameness this
3453 gets either rejected or translated to `call .+1234'. */
3454 if (GET_CODE (op) == CONST_INT)
3455 return 0;
3457 /* Explicitly allow SYMBOL_REF even if pic. */
3458 if (GET_CODE (op) == SYMBOL_REF)
3459 return 1;
3461 /* Otherwise we can allow any general_operand in the address. */
3462 return general_operand (op, Pmode);
3466 constant_call_address_operand (op, mode)
3467 rtx op;
3468 enum machine_mode mode ATTRIBUTE_UNUSED;
3470 if (GET_CODE (op) == CONST
3471 && GET_CODE (XEXP (op, 0)) == PLUS
3472 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3473 op = XEXP (XEXP (op, 0), 0);
3474 return GET_CODE (op) == SYMBOL_REF;
3477 /* Match exactly zero and one. */
3480 const0_operand (op, mode)
3481 register rtx op;
3482 enum machine_mode mode;
3484 return op == CONST0_RTX (mode);
3488 const1_operand (op, mode)
3489 register rtx op;
3490 enum machine_mode mode ATTRIBUTE_UNUSED;
3492 return op == const1_rtx;
3495 /* Match 2, 4, or 8. Used for leal multiplicands. */
3498 const248_operand (op, mode)
3499 register rtx op;
3500 enum machine_mode mode ATTRIBUTE_UNUSED;
3502 return (GET_CODE (op) == CONST_INT
3503 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3506 /* True if this is a constant appropriate for an increment or decremenmt. */
3509 incdec_operand (op, mode)
3510 register rtx op;
3511 enum machine_mode mode ATTRIBUTE_UNUSED;
3513 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3514 registers, since carry flag is not set. */
3515 if (TARGET_PENTIUM4 && !optimize_size)
3516 return 0;
3517 return op == const1_rtx || op == constm1_rtx;
3520 /* Return nonzero if OP is acceptable as operand of DImode shift
3521 expander. */
3524 shiftdi_operand (op, mode)
3525 rtx op;
3526 enum machine_mode mode ATTRIBUTE_UNUSED;
3528 if (TARGET_64BIT)
3529 return nonimmediate_operand (op, mode);
3530 else
3531 return register_operand (op, mode);
3534 /* Return false if this is the stack pointer, or any other fake
3535 register eliminable to the stack pointer. Otherwise, this is
3536 a register operand.
3538 This is used to prevent esp from being used as an index reg.
3539 Which would only happen in pathological cases. */
3542 reg_no_sp_operand (op, mode)
3543 register rtx op;
3544 enum machine_mode mode;
3546 rtx t = op;
3547 if (GET_CODE (t) == SUBREG)
3548 t = SUBREG_REG (t);
3549 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3550 return 0;
3552 return register_operand (op, mode);
3556 mmx_reg_operand (op, mode)
3557 register rtx op;
3558 enum machine_mode mode ATTRIBUTE_UNUSED;
3560 return MMX_REG_P (op);
3563 /* Return false if this is any eliminable register. Otherwise
3564 general_operand. */
3567 general_no_elim_operand (op, mode)
3568 register rtx op;
3569 enum machine_mode mode;
3571 rtx t = op;
3572 if (GET_CODE (t) == SUBREG)
3573 t = SUBREG_REG (t);
3574 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3575 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3576 || t == virtual_stack_dynamic_rtx)
3577 return 0;
3578 if (REG_P (t)
3579 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3580 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3581 return 0;
3583 return general_operand (op, mode);
3586 /* Return false if this is any eliminable register. Otherwise
3587 register_operand or const_int. */
3590 nonmemory_no_elim_operand (op, mode)
3591 register rtx op;
3592 enum machine_mode mode;
3594 rtx t = op;
3595 if (GET_CODE (t) == SUBREG)
3596 t = SUBREG_REG (t);
3597 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3598 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3599 || t == virtual_stack_dynamic_rtx)
3600 return 0;
3602 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3605 /* Return false if this is any eliminable register or stack register,
3606 otherwise work like register_operand. */
3609 index_register_operand (op, mode)
3610 register rtx op;
3611 enum machine_mode mode;
3613 rtx t = op;
3614 if (GET_CODE (t) == SUBREG)
3615 t = SUBREG_REG (t);
3616 if (!REG_P (t))
3617 return 0;
3618 if (t == arg_pointer_rtx
3619 || t == frame_pointer_rtx
3620 || t == virtual_incoming_args_rtx
3621 || t == virtual_stack_vars_rtx
3622 || t == virtual_stack_dynamic_rtx
3623 || REGNO (t) == STACK_POINTER_REGNUM)
3624 return 0;
3626 return general_operand (op, mode);
3629 /* Return true if op is a Q_REGS class register. */
3632 q_regs_operand (op, mode)
3633 register rtx op;
3634 enum machine_mode mode;
3636 if (mode != VOIDmode && GET_MODE (op) != mode)
3637 return 0;
3638 if (GET_CODE (op) == SUBREG)
3639 op = SUBREG_REG (op);
3640 return ANY_QI_REG_P (op);
3643 /* Return true if op is a NON_Q_REGS class register. */
3646 non_q_regs_operand (op, mode)
3647 register rtx op;
3648 enum machine_mode mode;
3650 if (mode != VOIDmode && GET_MODE (op) != mode)
3651 return 0;
3652 if (GET_CODE (op) == SUBREG)
3653 op = SUBREG_REG (op);
3654 return NON_QI_REG_P (op);
3657 /* Return 1 when OP is operand acceptable for standard SSE move. */
3659 vector_move_operand (op, mode)
3660 rtx op;
3661 enum machine_mode mode;
3663 if (nonimmediate_operand (op, mode))
3664 return 1;
3665 if (GET_MODE (op) != mode && mode != VOIDmode)
3666 return 0;
3667 return (op == CONST0_RTX (GET_MODE (op)));
3670 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3671 insns. */
3673 sse_comparison_operator (op, mode)
3674 rtx op;
3675 enum machine_mode mode ATTRIBUTE_UNUSED;
3677 enum rtx_code code = GET_CODE (op);
3678 switch (code)
3680 /* Operations supported directly. */
3681 case EQ:
3682 case LT:
3683 case LE:
3684 case UNORDERED:
3685 case NE:
3686 case UNGE:
3687 case UNGT:
3688 case ORDERED:
3689 return 1;
3690 /* These are equivalent to ones above in non-IEEE comparisons. */
3691 case UNEQ:
3692 case UNLT:
3693 case UNLE:
3694 case LTGT:
3695 case GE:
3696 case GT:
3697 return !TARGET_IEEE_FP;
3698 default:
3699 return 0;
3702 /* Return 1 if OP is a valid comparison operator in valid mode. */
3704 ix86_comparison_operator (op, mode)
3705 register rtx op;
3706 enum machine_mode mode;
3708 enum machine_mode inmode;
3709 enum rtx_code code = GET_CODE (op);
3710 if (mode != VOIDmode && GET_MODE (op) != mode)
3711 return 0;
3712 if (GET_RTX_CLASS (code) != '<')
3713 return 0;
3714 inmode = GET_MODE (XEXP (op, 0));
3716 if (inmode == CCFPmode || inmode == CCFPUmode)
3718 enum rtx_code second_code, bypass_code;
3719 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3720 return (bypass_code == NIL && second_code == NIL);
3722 switch (code)
3724 case EQ: case NE:
3725 return 1;
3726 case LT: case GE:
3727 if (inmode == CCmode || inmode == CCGCmode
3728 || inmode == CCGOCmode || inmode == CCNOmode)
3729 return 1;
3730 return 0;
3731 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3732 if (inmode == CCmode)
3733 return 1;
3734 return 0;
3735 case GT: case LE:
3736 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3737 return 1;
3738 return 0;
3739 default:
3740 return 0;
3744 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3747 fcmov_comparison_operator (op, mode)
3748 register rtx op;
3749 enum machine_mode mode;
3751 enum machine_mode inmode;
3752 enum rtx_code code = GET_CODE (op);
3753 if (mode != VOIDmode && GET_MODE (op) != mode)
3754 return 0;
3755 if (GET_RTX_CLASS (code) != '<')
3756 return 0;
3757 inmode = GET_MODE (XEXP (op, 0));
3758 if (inmode == CCFPmode || inmode == CCFPUmode)
3760 enum rtx_code second_code, bypass_code;
3761 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3762 if (bypass_code != NIL || second_code != NIL)
3763 return 0;
3764 code = ix86_fp_compare_code_to_integer (code);
3766 /* i387 supports just limited amount of conditional codes. */
3767 switch (code)
3769 case LTU: case GTU: case LEU: case GEU:
3770 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3771 return 1;
3772 return 0;
3773 case ORDERED: case UNORDERED:
3774 case EQ: case NE:
3775 return 1;
3776 default:
3777 return 0;
3781 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3784 promotable_binary_operator (op, mode)
3785 register rtx op;
3786 enum machine_mode mode ATTRIBUTE_UNUSED;
3788 switch (GET_CODE (op))
3790 case MULT:
3791 /* Modern CPUs have same latency for HImode and SImode multiply,
3792 but 386 and 486 do HImode multiply faster. */
3793 return ix86_cpu > PROCESSOR_I486;
3794 case PLUS:
3795 case AND:
3796 case IOR:
3797 case XOR:
3798 case ASHIFT:
3799 return 1;
3800 default:
3801 return 0;
3805 /* Nearly general operand, but accept any const_double, since we wish
3806 to be able to drop them into memory rather than have them get pulled
3807 into registers. */
3810 cmp_fp_expander_operand (op, mode)
3811 register rtx op;
3812 enum machine_mode mode;
3814 if (mode != VOIDmode && mode != GET_MODE (op))
3815 return 0;
3816 if (GET_CODE (op) == CONST_DOUBLE)
3817 return 1;
3818 return general_operand (op, mode);
3821 /* Match an SI or HImode register for a zero_extract. */
3824 ext_register_operand (op, mode)
3825 register rtx op;
3826 enum machine_mode mode ATTRIBUTE_UNUSED;
3828 int regno;
3829 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3830 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3831 return 0;
3833 if (!register_operand (op, VOIDmode))
3834 return 0;
3836 /* Be curefull to accept only registers having upper parts. */
3837 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3838 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3841 /* Return 1 if this is a valid binary floating-point operation.
3842 OP is the expression matched, and MODE is its mode. */
3845 binary_fp_operator (op, mode)
3846 register rtx op;
3847 enum machine_mode mode;
3849 if (mode != VOIDmode && mode != GET_MODE (op))
3850 return 0;
3852 switch (GET_CODE (op))
3854 case PLUS:
3855 case MINUS:
3856 case MULT:
3857 case DIV:
3858 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3860 default:
3861 return 0;
3866 mult_operator (op, mode)
3867 register rtx op;
3868 enum machine_mode mode ATTRIBUTE_UNUSED;
3870 return GET_CODE (op) == MULT;
3874 div_operator (op, mode)
3875 register rtx op;
3876 enum machine_mode mode ATTRIBUTE_UNUSED;
3878 return GET_CODE (op) == DIV;
3882 arith_or_logical_operator (op, mode)
3883 rtx op;
3884 enum machine_mode mode;
3886 return ((mode == VOIDmode || GET_MODE (op) == mode)
3887 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3888 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3891 /* Returns 1 if OP is memory operand with a displacement. */
3894 memory_displacement_operand (op, mode)
3895 register rtx op;
3896 enum machine_mode mode;
3898 struct ix86_address parts;
3900 if (! memory_operand (op, mode))
3901 return 0;
3903 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3904 abort ();
3906 return parts.disp != NULL_RTX;
3909 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3910 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3912 ??? It seems likely that this will only work because cmpsi is an
3913 expander, and no actual insns use this. */
3916 cmpsi_operand (op, mode)
3917 rtx op;
3918 enum machine_mode mode;
3920 if (nonimmediate_operand (op, mode))
3921 return 1;
3923 if (GET_CODE (op) == AND
3924 && GET_MODE (op) == SImode
3925 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3926 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3927 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3928 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3929 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3930 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3931 return 1;
3933 return 0;
3936 /* Returns 1 if OP is memory operand that can not be represented by the
3937 modRM array. */
3940 long_memory_operand (op, mode)
3941 register rtx op;
3942 enum machine_mode mode;
3944 if (! memory_operand (op, mode))
3945 return 0;
3947 return memory_address_length (op) != 0;
3950 /* Return nonzero if the rtx is known aligned. */
3953 aligned_operand (op, mode)
3954 rtx op;
3955 enum machine_mode mode;
3957 struct ix86_address parts;
3959 if (!general_operand (op, mode))
3960 return 0;
3962 /* Registers and immediate operands are always "aligned". */
3963 if (GET_CODE (op) != MEM)
3964 return 1;
3966 /* Don't even try to do any aligned optimizations with volatiles. */
3967 if (MEM_VOLATILE_P (op))
3968 return 0;
3970 op = XEXP (op, 0);
3972 /* Pushes and pops are only valid on the stack pointer. */
3973 if (GET_CODE (op) == PRE_DEC
3974 || GET_CODE (op) == POST_INC)
3975 return 1;
3977 /* Decode the address. */
3978 if (! ix86_decompose_address (op, &parts))
3979 abort ();
3981 if (parts.base && GET_CODE (parts.base) == SUBREG)
3982 parts.base = SUBREG_REG (parts.base);
3983 if (parts.index && GET_CODE (parts.index) == SUBREG)
3984 parts.index = SUBREG_REG (parts.index);
3986 /* Look for some component that isn't known to be aligned. */
3987 if (parts.index)
3989 if (parts.scale < 4
3990 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3991 return 0;
3993 if (parts.base)
3995 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3996 return 0;
3998 if (parts.disp)
4000 if (GET_CODE (parts.disp) != CONST_INT
4001 || (INTVAL (parts.disp) & 3) != 0)
4002 return 0;
4005 /* Didn't find one -- this must be an aligned address. */
4006 return 1;
4009 /* Return true if the constant is something that can be loaded with
4010 a special instruction. Only handle 0.0 and 1.0; others are less
4011 worthwhile. */
4014 standard_80387_constant_p (x)
4015 rtx x;
4017 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4018 return -1;
4019 /* Note that on the 80387, other constants, such as pi, that we should support
4020 too. On some machines, these are much slower to load as standard constant,
4021 than to load from doubles in memory. */
4022 if (x == CONST0_RTX (GET_MODE (x)))
4023 return 1;
4024 if (x == CONST1_RTX (GET_MODE (x)))
4025 return 2;
4026 return 0;
4029 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4032 standard_sse_constant_p (x)
4033 rtx x;
4035 if (x == const0_rtx)
4036 return 1;
4037 return (x == CONST0_RTX (GET_MODE (x)));
4040 /* Returns 1 if OP contains a symbol reference */
4043 symbolic_reference_mentioned_p (op)
4044 rtx op;
4046 register const char *fmt;
4047 register int i;
4049 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4050 return 1;
4052 fmt = GET_RTX_FORMAT (GET_CODE (op));
4053 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4055 if (fmt[i] == 'E')
4057 register int j;
4059 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4060 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4061 return 1;
4064 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4065 return 1;
4068 return 0;
4071 /* Return 1 if it is appropriate to emit `ret' instructions in the
4072 body of a function. Do this only if the epilogue is simple, needing a
4073 couple of insns. Prior to reloading, we can't tell how many registers
4074 must be saved, so return 0 then. Return 0 if there is no frame
4075 marker to de-allocate.
4077 If NON_SAVING_SETJMP is defined and true, then it is not possible
4078 for the epilogue to be simple, so return 0. This is a special case
4079 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4080 until final, but jump_optimize may need to know sooner if a
4081 `return' is OK. */
4084 ix86_can_use_return_insn_p ()
4086 struct ix86_frame frame;
4088 #ifdef NON_SAVING_SETJMP
4089 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4090 return 0;
4091 #endif
4093 if (! reload_completed || frame_pointer_needed)
4094 return 0;
4096 /* Don't allow more than 32 pop, since that's all we can do
4097 with one instruction. */
4098 if (current_function_pops_args
4099 && current_function_args_size >= 32768)
4100 return 0;
4102 ix86_compute_frame_layout (&frame);
4103 return frame.to_allocate == 0 && frame.nregs == 0;
4106 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4108 x86_64_sign_extended_value (value)
4109 rtx value;
4111 switch (GET_CODE (value))
4113 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4114 to be at least 32 and this all acceptable constants are
4115 represented as CONST_INT. */
4116 case CONST_INT:
4117 if (HOST_BITS_PER_WIDE_INT == 32)
4118 return 1;
4119 else
4121 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4122 return trunc_int_for_mode (val, SImode) == val;
4124 break;
4126 /* For certain code models, the symbolic references are known to fit.
4127 in CM_SMALL_PIC model we know it fits if it is local to the shared
4128 library. Don't count TLS SYMBOL_REFs here, since they should fit
4129 only if inside of UNSPEC handled below. */
4130 case SYMBOL_REF:
4131 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4133 /* For certain code models, the code is near as well. */
4134 case LABEL_REF:
4135 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4136 || ix86_cmodel == CM_KERNEL);
4138 /* We also may accept the offsetted memory references in certain special
4139 cases. */
4140 case CONST:
4141 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4142 switch (XINT (XEXP (value, 0), 1))
4144 case UNSPEC_GOTPCREL:
4145 case UNSPEC_DTPOFF:
4146 case UNSPEC_GOTNTPOFF:
4147 case UNSPEC_NTPOFF:
4148 return 1;
4149 default:
4150 break;
4152 if (GET_CODE (XEXP (value, 0)) == PLUS)
4154 rtx op1 = XEXP (XEXP (value, 0), 0);
4155 rtx op2 = XEXP (XEXP (value, 0), 1);
4156 HOST_WIDE_INT offset;
4158 if (ix86_cmodel == CM_LARGE)
4159 return 0;
4160 if (GET_CODE (op2) != CONST_INT)
4161 return 0;
4162 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4163 switch (GET_CODE (op1))
4165 case SYMBOL_REF:
4166 /* For CM_SMALL assume that latest object is 16MB before
4167 end of 31bits boundary. We may also accept pretty
4168 large negative constants knowing that all objects are
4169 in the positive half of address space. */
4170 if (ix86_cmodel == CM_SMALL
4171 && offset < 16*1024*1024
4172 && trunc_int_for_mode (offset, SImode) == offset)
4173 return 1;
4174 /* For CM_KERNEL we know that all object resist in the
4175 negative half of 32bits address space. We may not
4176 accept negative offsets, since they may be just off
4177 and we may accept pretty large positive ones. */
4178 if (ix86_cmodel == CM_KERNEL
4179 && offset > 0
4180 && trunc_int_for_mode (offset, SImode) == offset)
4181 return 1;
4182 break;
4183 case LABEL_REF:
4184 /* These conditions are similar to SYMBOL_REF ones, just the
4185 constraints for code models differ. */
4186 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4187 && offset < 16*1024*1024
4188 && trunc_int_for_mode (offset, SImode) == offset)
4189 return 1;
4190 if (ix86_cmodel == CM_KERNEL
4191 && offset > 0
4192 && trunc_int_for_mode (offset, SImode) == offset)
4193 return 1;
4194 break;
4195 case UNSPEC:
4196 switch (XINT (op1, 1))
4198 case UNSPEC_DTPOFF:
4199 case UNSPEC_NTPOFF:
4200 if (offset > 0
4201 && trunc_int_for_mode (offset, SImode) == offset)
4202 return 1;
4204 break;
4205 default:
4206 return 0;
4209 return 0;
4210 default:
4211 return 0;
4215 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4217 x86_64_zero_extended_value (value)
4218 rtx value;
4220 switch (GET_CODE (value))
4222 case CONST_DOUBLE:
4223 if (HOST_BITS_PER_WIDE_INT == 32)
4224 return (GET_MODE (value) == VOIDmode
4225 && !CONST_DOUBLE_HIGH (value));
4226 else
4227 return 0;
4228 case CONST_INT:
4229 if (HOST_BITS_PER_WIDE_INT == 32)
4230 return INTVAL (value) >= 0;
4231 else
4232 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4233 break;
4235 /* For certain code models, the symbolic references are known to fit. */
4236 case SYMBOL_REF:
4237 return ix86_cmodel == CM_SMALL;
4239 /* For certain code models, the code is near as well. */
4240 case LABEL_REF:
4241 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4243 /* We also may accept the offsetted memory references in certain special
4244 cases. */
4245 case CONST:
4246 if (GET_CODE (XEXP (value, 0)) == PLUS)
4248 rtx op1 = XEXP (XEXP (value, 0), 0);
4249 rtx op2 = XEXP (XEXP (value, 0), 1);
4251 if (ix86_cmodel == CM_LARGE)
4252 return 0;
4253 switch (GET_CODE (op1))
4255 case SYMBOL_REF:
4256 return 0;
4257 /* For small code model we may accept pretty large positive
4258 offsets, since one bit is available for free. Negative
4259 offsets are limited by the size of NULL pointer area
4260 specified by the ABI. */
4261 if (ix86_cmodel == CM_SMALL
4262 && GET_CODE (op2) == CONST_INT
4263 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4264 && (trunc_int_for_mode (INTVAL (op2), SImode)
4265 == INTVAL (op2)))
4266 return 1;
4267 /* ??? For the kernel, we may accept adjustment of
4268 -0x10000000, since we know that it will just convert
4269 negative address space to positive, but perhaps this
4270 is not worthwhile. */
4271 break;
4272 case LABEL_REF:
4273 /* These conditions are similar to SYMBOL_REF ones, just the
4274 constraints for code models differ. */
4275 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4276 && GET_CODE (op2) == CONST_INT
4277 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4278 && (trunc_int_for_mode (INTVAL (op2), SImode)
4279 == INTVAL (op2)))
4280 return 1;
4281 break;
4282 default:
4283 return 0;
4286 return 0;
4287 default:
4288 return 0;
4292 /* Value should be nonzero if functions must have frame pointers.
4293 Zero means the frame pointer need not be set up (and parms may
4294 be accessed via the stack pointer) in functions that seem suitable. */
4297 ix86_frame_pointer_required ()
4299 /* If we accessed previous frames, then the generated code expects
4300 to be able to access the saved ebp value in our frame. */
4301 if (cfun->machine->accesses_prev_frame)
4302 return 1;
4304 /* Several x86 os'es need a frame pointer for other reasons,
4305 usually pertaining to setjmp. */
4306 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4307 return 1;
4309 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4310 the frame pointer by default. Turn it back on now if we've not
4311 got a leaf function. */
4312 if (TARGET_OMIT_LEAF_FRAME_POINTER
4313 && (!current_function_is_leaf))
4314 return 1;
4316 if (current_function_profile)
4317 return 1;
4319 return 0;
4322 /* Record that the current function accesses previous call frames. */
4324 void
4325 ix86_setup_frame_addresses ()
4327 cfun->machine->accesses_prev_frame = 1;
4330 #if defined(HAVE_GAS_HIDDEN) && (defined(SUPPORTS_ONE_ONLY) && SUPPORTS_ONE_ONLY)
4331 # define USE_HIDDEN_LINKONCE 1
4332 #else
4333 # define USE_HIDDEN_LINKONCE 0
4334 #endif
4336 static int pic_labels_used;
4338 /* Fills in the label name that should be used for a pc thunk for
4339 the given register. */
4341 static void
4342 get_pc_thunk_name (name, regno)
4343 char name[32];
4344 unsigned int regno;
4346 if (USE_HIDDEN_LINKONCE)
4347 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4348 else
4349 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4353 /* This function generates code for -fpic that loads %ebx with
4354 the return address of the caller and then returns. */
4356 void
4357 ix86_asm_file_end (file)
4358 FILE *file;
4360 rtx xops[2];
4361 int regno;
4363 for (regno = 0; regno < 8; ++regno)
4365 char name[32];
4367 if (! ((pic_labels_used >> regno) & 1))
4368 continue;
4370 get_pc_thunk_name (name, regno);
4372 if (USE_HIDDEN_LINKONCE)
4374 tree decl;
4376 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4377 error_mark_node);
4378 TREE_PUBLIC (decl) = 1;
4379 TREE_STATIC (decl) = 1;
4380 DECL_ONE_ONLY (decl) = 1;
4382 (*targetm.asm_out.unique_section) (decl, 0);
4383 named_section (decl, NULL, 0);
4385 (*targetm.asm_out.globalize_label) (file, name);
4386 fputs ("\t.hidden\t", file);
4387 assemble_name (file, name);
4388 fputc ('\n', file);
4389 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4391 else
4393 text_section ();
4394 ASM_OUTPUT_LABEL (file, name);
4397 xops[0] = gen_rtx_REG (SImode, regno);
4398 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4399 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4400 output_asm_insn ("ret", xops);
4404 /* Emit code for the SET_GOT patterns. */
4406 const char *
4407 output_set_got (dest)
4408 rtx dest;
4410 rtx xops[3];
4412 xops[0] = dest;
4413 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4415 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4417 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4419 if (!flag_pic)
4420 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4421 else
4422 output_asm_insn ("call\t%a2", xops);
4424 #if TARGET_MACHO
4425 /* Output the "canonical" label name ("Lxx$pb") here too. This
4426 is what will be referred to by the Mach-O PIC subsystem. */
4427 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4428 #endif
4429 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4430 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4432 if (flag_pic)
4433 output_asm_insn ("pop{l}\t%0", xops);
4435 else
4437 char name[32];
4438 get_pc_thunk_name (name, REGNO (dest));
4439 pic_labels_used |= 1 << REGNO (dest);
4441 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4442 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4443 output_asm_insn ("call\t%X2", xops);
4446 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4447 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4448 else if (!TARGET_MACHO)
4449 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4451 return "";
4454 /* Generate an "push" pattern for input ARG. */
4456 static rtx
4457 gen_push (arg)
4458 rtx arg;
4460 return gen_rtx_SET (VOIDmode,
4461 gen_rtx_MEM (Pmode,
4462 gen_rtx_PRE_DEC (Pmode,
4463 stack_pointer_rtx)),
4464 arg);
4467 /* Return >= 0 if there is an unused call-clobbered register available
4468 for the entire function. */
4470 static unsigned int
4471 ix86_select_alt_pic_regnum ()
4473 if (current_function_is_leaf && !current_function_profile)
4475 int i;
4476 for (i = 2; i >= 0; --i)
4477 if (!regs_ever_live[i])
4478 return i;
4481 return INVALID_REGNUM;
4484 /* Return 1 if we need to save REGNO. */
4485 static int
4486 ix86_save_reg (regno, maybe_eh_return)
4487 unsigned int regno;
4488 int maybe_eh_return;
4490 if (pic_offset_table_rtx
4491 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4492 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4493 || current_function_profile
4494 || current_function_calls_eh_return
4495 || current_function_uses_const_pool))
4497 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4498 return 0;
4499 return 1;
4502 if (current_function_calls_eh_return && maybe_eh_return)
4504 unsigned i;
4505 for (i = 0; ; i++)
4507 unsigned test = EH_RETURN_DATA_REGNO (i);
4508 if (test == INVALID_REGNUM)
4509 break;
4510 if (test == regno)
4511 return 1;
4515 return (regs_ever_live[regno]
4516 && !call_used_regs[regno]
4517 && !fixed_regs[regno]
4518 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4521 /* Return number of registers to be saved on the stack. */
4523 static int
4524 ix86_nsaved_regs ()
4526 int nregs = 0;
4527 int regno;
4529 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4530 if (ix86_save_reg (regno, true))
4531 nregs++;
4532 return nregs;
4535 /* Return the offset between two registers, one to be eliminated, and the other
4536 its replacement, at the start of a routine. */
4538 HOST_WIDE_INT
4539 ix86_initial_elimination_offset (from, to)
4540 int from;
4541 int to;
4543 struct ix86_frame frame;
4544 ix86_compute_frame_layout (&frame);
4546 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4547 return frame.hard_frame_pointer_offset;
4548 else if (from == FRAME_POINTER_REGNUM
4549 && to == HARD_FRAME_POINTER_REGNUM)
4550 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4551 else
4553 if (to != STACK_POINTER_REGNUM)
4554 abort ();
4555 else if (from == ARG_POINTER_REGNUM)
4556 return frame.stack_pointer_offset;
4557 else if (from != FRAME_POINTER_REGNUM)
4558 abort ();
4559 else
4560 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4564 /* Fill structure ix86_frame about frame of currently computed function. */
4566 static void
4567 ix86_compute_frame_layout (frame)
4568 struct ix86_frame *frame;
4570 HOST_WIDE_INT total_size;
4571 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4572 int offset;
4573 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4574 HOST_WIDE_INT size = get_frame_size ();
4576 frame->nregs = ix86_nsaved_regs ();
4577 total_size = size;
4579 /* Skip return address and saved base pointer. */
4580 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4582 frame->hard_frame_pointer_offset = offset;
4584 /* Do some sanity checking of stack_alignment_needed and
4585 preferred_alignment, since i386 port is the only using those features
4586 that may break easily. */
4588 if (size && !stack_alignment_needed)
4589 abort ();
4590 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4591 abort ();
4592 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4593 abort ();
4594 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4595 abort ();
4597 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4598 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4600 /* Register save area */
4601 offset += frame->nregs * UNITS_PER_WORD;
4603 /* Va-arg area */
4604 if (ix86_save_varrargs_registers)
4606 offset += X86_64_VARARGS_SIZE;
4607 frame->va_arg_size = X86_64_VARARGS_SIZE;
4609 else
4610 frame->va_arg_size = 0;
4612 /* Align start of frame for local function. */
4613 frame->padding1 = ((offset + stack_alignment_needed - 1)
4614 & -stack_alignment_needed) - offset;
4616 offset += frame->padding1;
4618 /* Frame pointer points here. */
4619 frame->frame_pointer_offset = offset;
4621 offset += size;
4623 /* Add outgoing arguments area. Can be skipped if we eliminated
4624 all the function calls as dead code. */
4625 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4627 offset += current_function_outgoing_args_size;
4628 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4630 else
4631 frame->outgoing_arguments_size = 0;
4633 /* Align stack boundary. Only needed if we're calling another function
4634 or using alloca. */
4635 if (!current_function_is_leaf || current_function_calls_alloca)
4636 frame->padding2 = ((offset + preferred_alignment - 1)
4637 & -preferred_alignment) - offset;
4638 else
4639 frame->padding2 = 0;
4641 offset += frame->padding2;
4643 /* We've reached end of stack frame. */
4644 frame->stack_pointer_offset = offset;
4646 /* Size prologue needs to allocate. */
4647 frame->to_allocate =
4648 (size + frame->padding1 + frame->padding2
4649 + frame->outgoing_arguments_size + frame->va_arg_size);
4651 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4652 && current_function_is_leaf)
4654 frame->red_zone_size = frame->to_allocate;
4655 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4656 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4658 else
4659 frame->red_zone_size = 0;
4660 frame->to_allocate -= frame->red_zone_size;
4661 frame->stack_pointer_offset -= frame->red_zone_size;
4662 #if 0
4663 fprintf (stderr, "nregs: %i\n", frame->nregs);
4664 fprintf (stderr, "size: %i\n", size);
4665 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4666 fprintf (stderr, "padding1: %i\n", frame->padding1);
4667 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4668 fprintf (stderr, "padding2: %i\n", frame->padding2);
4669 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4670 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4671 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4672 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4673 frame->hard_frame_pointer_offset);
4674 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4675 #endif
4678 /* Emit code to save registers in the prologue. */
4680 static void
4681 ix86_emit_save_regs ()
4683 register int regno;
4684 rtx insn;
4686 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4687 if (ix86_save_reg (regno, true))
4689 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4690 RTX_FRAME_RELATED_P (insn) = 1;
4694 /* Emit code to save registers using MOV insns. First register
4695 is restored from POINTER + OFFSET. */
4696 static void
4697 ix86_emit_save_regs_using_mov (pointer, offset)
4698 rtx pointer;
4699 HOST_WIDE_INT offset;
4701 int regno;
4702 rtx insn;
4704 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4705 if (ix86_save_reg (regno, true))
4707 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4708 Pmode, offset),
4709 gen_rtx_REG (Pmode, regno));
4710 RTX_FRAME_RELATED_P (insn) = 1;
4711 offset += UNITS_PER_WORD;
4715 /* Expand the prologue into a bunch of separate insns. */
4717 void
4718 ix86_expand_prologue ()
4720 rtx insn;
4721 bool pic_reg_used;
4722 struct ix86_frame frame;
4723 int use_mov = 0;
4724 HOST_WIDE_INT allocate;
4726 if (!optimize_size)
4728 use_fast_prologue_epilogue
4729 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4730 if (TARGET_PROLOGUE_USING_MOVE)
4731 use_mov = use_fast_prologue_epilogue;
4733 ix86_compute_frame_layout (&frame);
4735 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4736 slower on all targets. Also sdb doesn't like it. */
4738 if (frame_pointer_needed)
4740 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4741 RTX_FRAME_RELATED_P (insn) = 1;
4743 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4744 RTX_FRAME_RELATED_P (insn) = 1;
4747 allocate = frame.to_allocate;
4748 /* In case we are dealing only with single register and empty frame,
4749 push is equivalent of the mov+add sequence. */
4750 if (allocate == 0 && frame.nregs <= 1)
4751 use_mov = 0;
4753 if (!use_mov)
4754 ix86_emit_save_regs ();
4755 else
4756 allocate += frame.nregs * UNITS_PER_WORD;
4758 if (allocate == 0)
4760 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4762 insn = emit_insn (gen_pro_epilogue_adjust_stack
4763 (stack_pointer_rtx, stack_pointer_rtx,
4764 GEN_INT (-allocate)));
4765 RTX_FRAME_RELATED_P (insn) = 1;
4767 else
4769 /* Only valid for Win32. */
4770 rtx eax = gen_rtx_REG (SImode, 0);
4771 bool eax_live = ix86_eax_live_at_start_p ();
4773 if (TARGET_64BIT)
4774 abort ();
4776 if (eax_live)
4778 emit_insn (gen_push (eax));
4779 allocate -= 4;
4782 insn = emit_move_insn (eax, GEN_INT (allocate));
4783 RTX_FRAME_RELATED_P (insn) = 1;
4785 insn = emit_insn (gen_allocate_stack_worker (eax));
4786 RTX_FRAME_RELATED_P (insn) = 1;
4788 if (eax_live)
4790 rtx t = plus_constant (stack_pointer_rtx, allocate);
4791 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4795 if (use_mov)
4797 if (!frame_pointer_needed || !frame.to_allocate)
4798 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4799 else
4800 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4801 -frame.nregs * UNITS_PER_WORD);
4804 #ifdef SUBTARGET_PROLOGUE
4805 SUBTARGET_PROLOGUE;
4806 #endif
4808 pic_reg_used = false;
4809 if (pic_offset_table_rtx
4810 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4811 || current_function_profile))
4813 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4815 if (alt_pic_reg_used != INVALID_REGNUM)
4816 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4818 pic_reg_used = true;
4821 if (pic_reg_used)
4823 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4825 /* Even with accurate pre-reload life analysis, we can wind up
4826 deleting all references to the pic register after reload.
4827 Consider if cross-jumping unifies two sides of a branch
4828 controled by a comparison vs the only read from a global.
4829 In which case, allow the set_got to be deleted, though we're
4830 too late to do anything about the ebx save in the prologue. */
4831 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4834 /* Prevent function calls from be scheduled before the call to mcount.
4835 In the pic_reg_used case, make sure that the got load isn't deleted. */
4836 if (current_function_profile)
4837 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4840 /* Emit code to restore saved registers using MOV insns. First register
4841 is restored from POINTER + OFFSET. */
4842 static void
4843 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4844 rtx pointer;
4845 int offset;
4846 int maybe_eh_return;
4848 int regno;
4850 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4851 if (ix86_save_reg (regno, maybe_eh_return))
4853 emit_move_insn (gen_rtx_REG (Pmode, regno),
4854 adjust_address (gen_rtx_MEM (Pmode, pointer),
4855 Pmode, offset));
4856 offset += UNITS_PER_WORD;
4860 /* Restore function stack, frame, and registers. */
4862 void
4863 ix86_expand_epilogue (style)
4864 int style;
4866 int regno;
4867 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4868 struct ix86_frame frame;
4869 HOST_WIDE_INT offset;
4871 ix86_compute_frame_layout (&frame);
4873 /* Calculate start of saved registers relative to ebp. Special care
4874 must be taken for the normal return case of a function using
4875 eh_return: the eax and edx registers are marked as saved, but not
4876 restored along this path. */
4877 offset = frame.nregs;
4878 if (current_function_calls_eh_return && style != 2)
4879 offset -= 2;
4880 offset *= -UNITS_PER_WORD;
4882 /* If we're only restoring one register and sp is not valid then
4883 using a move instruction to restore the register since it's
4884 less work than reloading sp and popping the register.
4886 The default code result in stack adjustment using add/lea instruction,
4887 while this code results in LEAVE instruction (or discrete equivalent),
4888 so it is profitable in some other cases as well. Especially when there
4889 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4890 and there is exactly one register to pop. This heruistic may need some
4891 tuning in future. */
4892 if ((!sp_valid && frame.nregs <= 1)
4893 || (TARGET_EPILOGUE_USING_MOVE
4894 && use_fast_prologue_epilogue
4895 && (frame.nregs > 1 || frame.to_allocate))
4896 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4897 || (frame_pointer_needed && TARGET_USE_LEAVE
4898 && use_fast_prologue_epilogue && frame.nregs == 1)
4899 || current_function_calls_eh_return)
4901 /* Restore registers. We can use ebp or esp to address the memory
4902 locations. If both are available, default to ebp, since offsets
4903 are known to be small. Only exception is esp pointing directly to the
4904 end of block of saved registers, where we may simplify addressing
4905 mode. */
4907 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4908 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4909 frame.to_allocate, style == 2);
4910 else
4911 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4912 offset, style == 2);
4914 /* eh_return epilogues need %ecx added to the stack pointer. */
4915 if (style == 2)
4917 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4919 if (frame_pointer_needed)
4921 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4922 tmp = plus_constant (tmp, UNITS_PER_WORD);
4923 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4925 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4926 emit_move_insn (hard_frame_pointer_rtx, tmp);
4928 emit_insn (gen_pro_epilogue_adjust_stack
4929 (stack_pointer_rtx, sa, const0_rtx));
4931 else
4933 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4934 tmp = plus_constant (tmp, (frame.to_allocate
4935 + frame.nregs * UNITS_PER_WORD));
4936 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4939 else if (!frame_pointer_needed)
4940 emit_insn (gen_pro_epilogue_adjust_stack
4941 (stack_pointer_rtx, stack_pointer_rtx,
4942 GEN_INT (frame.to_allocate
4943 + frame.nregs * UNITS_PER_WORD)));
4944 /* If not an i386, mov & pop is faster than "leave". */
4945 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4946 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4947 else
4949 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4950 hard_frame_pointer_rtx,
4951 const0_rtx));
4952 if (TARGET_64BIT)
4953 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4954 else
4955 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4958 else
4960 /* First step is to deallocate the stack frame so that we can
4961 pop the registers. */
4962 if (!sp_valid)
4964 if (!frame_pointer_needed)
4965 abort ();
4966 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4967 hard_frame_pointer_rtx,
4968 GEN_INT (offset)));
4970 else if (frame.to_allocate)
4971 emit_insn (gen_pro_epilogue_adjust_stack
4972 (stack_pointer_rtx, stack_pointer_rtx,
4973 GEN_INT (frame.to_allocate)));
4975 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4976 if (ix86_save_reg (regno, false))
4978 if (TARGET_64BIT)
4979 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4980 else
4981 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4983 if (frame_pointer_needed)
4985 /* Leave results in shorter dependency chains on CPUs that are
4986 able to grok it fast. */
4987 if (TARGET_USE_LEAVE)
4988 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4989 else if (TARGET_64BIT)
4990 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4991 else
4992 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4996 /* Sibcall epilogues don't want a return instruction. */
4997 if (style == 0)
4998 return;
5000 if (current_function_pops_args && current_function_args_size)
5002 rtx popc = GEN_INT (current_function_pops_args);
5004 /* i386 can only pop 64K bytes. If asked to pop more, pop
5005 return address, do explicit add, and jump indirectly to the
5006 caller. */
5008 if (current_function_pops_args >= 65536)
5010 rtx ecx = gen_rtx_REG (SImode, 2);
5012 /* There are is no "pascal" calling convention in 64bit ABI. */
5013 if (TARGET_64BIT)
5014 abort ();
5016 emit_insn (gen_popsi1 (ecx));
5017 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5018 emit_jump_insn (gen_return_indirect_internal (ecx));
5020 else
5021 emit_jump_insn (gen_return_pop_internal (popc));
5023 else
5024 emit_jump_insn (gen_return_internal ());
5027 /* Reset from the function's potential modifications. */
5029 static void
5030 ix86_output_function_epilogue (file, size)
5031 FILE *file ATTRIBUTE_UNUSED;
5032 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5034 if (pic_offset_table_rtx)
5035 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5038 /* Extract the parts of an RTL expression that is a valid memory address
5039 for an instruction. Return 0 if the structure of the address is
5040 grossly off. Return -1 if the address contains ASHIFT, so it is not
5041 strictly valid, but still used for computing length of lea instruction.
5044 static int
5045 ix86_decompose_address (addr, out)
5046 register rtx addr;
5047 struct ix86_address *out;
5049 rtx base = NULL_RTX;
5050 rtx index = NULL_RTX;
5051 rtx disp = NULL_RTX;
5052 HOST_WIDE_INT scale = 1;
5053 rtx scale_rtx = NULL_RTX;
5054 int retval = 1;
5056 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5057 base = addr;
5058 else if (GET_CODE (addr) == PLUS)
5060 rtx op0 = XEXP (addr, 0);
5061 rtx op1 = XEXP (addr, 1);
5062 enum rtx_code code0 = GET_CODE (op0);
5063 enum rtx_code code1 = GET_CODE (op1);
5065 if (code0 == REG || code0 == SUBREG)
5067 if (code1 == REG || code1 == SUBREG)
5068 index = op0, base = op1; /* index + base */
5069 else
5070 base = op0, disp = op1; /* base + displacement */
5072 else if (code0 == MULT)
5074 index = XEXP (op0, 0);
5075 scale_rtx = XEXP (op0, 1);
5076 if (code1 == REG || code1 == SUBREG)
5077 base = op1; /* index*scale + base */
5078 else
5079 disp = op1; /* index*scale + disp */
5081 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5083 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5084 scale_rtx = XEXP (XEXP (op0, 0), 1);
5085 base = XEXP (op0, 1);
5086 disp = op1;
5088 else if (code0 == PLUS)
5090 index = XEXP (op0, 0); /* index + base + disp */
5091 base = XEXP (op0, 1);
5092 disp = op1;
5094 else
5095 return 0;
5097 else if (GET_CODE (addr) == MULT)
5099 index = XEXP (addr, 0); /* index*scale */
5100 scale_rtx = XEXP (addr, 1);
5102 else if (GET_CODE (addr) == ASHIFT)
5104 rtx tmp;
5106 /* We're called for lea too, which implements ashift on occasion. */
5107 index = XEXP (addr, 0);
5108 tmp = XEXP (addr, 1);
5109 if (GET_CODE (tmp) != CONST_INT)
5110 return 0;
5111 scale = INTVAL (tmp);
5112 if ((unsigned HOST_WIDE_INT) scale > 3)
5113 return 0;
5114 scale = 1 << scale;
5115 retval = -1;
5117 else
5118 disp = addr; /* displacement */
5120 /* Extract the integral value of scale. */
5121 if (scale_rtx)
5123 if (GET_CODE (scale_rtx) != CONST_INT)
5124 return 0;
5125 scale = INTVAL (scale_rtx);
5128 /* Allow arg pointer and stack pointer as index if there is not scaling */
5129 if (base && index && scale == 1
5130 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5131 || index == stack_pointer_rtx))
5133 rtx tmp = base;
5134 base = index;
5135 index = tmp;
5138 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5139 if ((base == hard_frame_pointer_rtx
5140 || base == frame_pointer_rtx
5141 || base == arg_pointer_rtx) && !disp)
5142 disp = const0_rtx;
5144 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5145 Avoid this by transforming to [%esi+0]. */
5146 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5147 && base && !index && !disp
5148 && REG_P (base)
5149 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5150 disp = const0_rtx;
5152 /* Special case: encode reg+reg instead of reg*2. */
5153 if (!base && index && scale && scale == 2)
5154 base = index, scale = 1;
5156 /* Special case: scaling cannot be encoded without base or displacement. */
5157 if (!base && !disp && index && scale != 1)
5158 disp = const0_rtx;
5160 out->base = base;
5161 out->index = index;
5162 out->disp = disp;
5163 out->scale = scale;
5165 return retval;
5168 /* Return cost of the memory address x.
5169 For i386, it is better to use a complex address than let gcc copy
5170 the address into a reg and make a new pseudo. But not if the address
5171 requires to two regs - that would mean more pseudos with longer
5172 lifetimes. */
5174 ix86_address_cost (x)
5175 rtx x;
5177 struct ix86_address parts;
5178 int cost = 1;
5180 if (!ix86_decompose_address (x, &parts))
5181 abort ();
5183 if (parts.base && GET_CODE (parts.base) == SUBREG)
5184 parts.base = SUBREG_REG (parts.base);
5185 if (parts.index && GET_CODE (parts.index) == SUBREG)
5186 parts.index = SUBREG_REG (parts.index);
5188 /* More complex memory references are better. */
5189 if (parts.disp && parts.disp != const0_rtx)
5190 cost--;
5192 /* Attempt to minimize number of registers in the address. */
5193 if ((parts.base
5194 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5195 || (parts.index
5196 && (!REG_P (parts.index)
5197 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5198 cost++;
5200 if (parts.base
5201 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5202 && parts.index
5203 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5204 && parts.base != parts.index)
5205 cost++;
5207 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5208 since it's predecode logic can't detect the length of instructions
5209 and it degenerates to vector decoded. Increase cost of such
5210 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5211 to split such addresses or even refuse such addresses at all.
5213 Following addressing modes are affected:
5214 [base+scale*index]
5215 [scale*index+disp]
5216 [base+index]
5218 The first and last case may be avoidable by explicitly coding the zero in
5219 memory address, but I don't have AMD-K6 machine handy to check this
5220 theory. */
5222 if (TARGET_K6
5223 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5224 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5225 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5226 cost += 10;
5228 return cost;
5231 /* If X is a machine specific address (i.e. a symbol or label being
5232 referenced as a displacement from the GOT implemented using an
5233 UNSPEC), then return the base term. Otherwise return X. */
5236 ix86_find_base_term (x)
5237 rtx x;
5239 rtx term;
5241 if (TARGET_64BIT)
5243 if (GET_CODE (x) != CONST)
5244 return x;
5245 term = XEXP (x, 0);
5246 if (GET_CODE (term) == PLUS
5247 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5248 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5249 term = XEXP (term, 0);
5250 if (GET_CODE (term) != UNSPEC
5251 || XINT (term, 1) != UNSPEC_GOTPCREL)
5252 return x;
5254 term = XVECEXP (term, 0, 0);
5256 if (GET_CODE (term) != SYMBOL_REF
5257 && GET_CODE (term) != LABEL_REF)
5258 return x;
5260 return term;
5263 if (GET_CODE (x) != PLUS
5264 || XEXP (x, 0) != pic_offset_table_rtx
5265 || GET_CODE (XEXP (x, 1)) != CONST)
5266 return x;
5268 term = XEXP (XEXP (x, 1), 0);
5270 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5271 term = XEXP (term, 0);
5273 if (GET_CODE (term) != UNSPEC
5274 || XINT (term, 1) != UNSPEC_GOTOFF)
5275 return x;
5277 term = XVECEXP (term, 0, 0);
5279 if (GET_CODE (term) != SYMBOL_REF
5280 && GET_CODE (term) != LABEL_REF)
5281 return x;
5283 return term;
5286 /* Determine if a given RTX is a valid constant. We already know this
5287 satisfies CONSTANT_P. */
5289 bool
5290 legitimate_constant_p (x)
5291 rtx x;
5293 rtx inner;
5295 switch (GET_CODE (x))
5297 case SYMBOL_REF:
5298 /* TLS symbols are not constant. */
5299 if (tls_symbolic_operand (x, Pmode))
5300 return false;
5301 break;
5303 case CONST:
5304 inner = XEXP (x, 0);
5306 /* Offsets of TLS symbols are never valid.
5307 Discourage CSE from creating them. */
5308 if (GET_CODE (inner) == PLUS
5309 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5310 return false;
5312 /* Only some unspecs are valid as "constants". */
5313 if (GET_CODE (inner) == UNSPEC)
5314 switch (XINT (inner, 1))
5316 case UNSPEC_TPOFF:
5317 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5318 default:
5319 return false;
5321 break;
5323 default:
5324 break;
5327 /* Otherwise we handle everything else in the move patterns. */
5328 return true;
5331 /* Determine if it's legal to put X into the constant pool. This
5332 is not possible for the address of thread-local symbols, which
5333 is checked above. */
5335 static bool
5336 ix86_cannot_force_const_mem (x)
5337 rtx x;
5339 return !legitimate_constant_p (x);
5342 /* Determine if a given RTX is a valid constant address. */
5344 bool
5345 constant_address_p (x)
5346 rtx x;
5348 switch (GET_CODE (x))
5350 case LABEL_REF:
5351 case CONST_INT:
5352 return true;
5354 case CONST_DOUBLE:
5355 return TARGET_64BIT;
5357 case CONST:
5358 /* For Mach-O, really believe the CONST. */
5359 if (TARGET_MACHO)
5360 return true;
5361 /* Otherwise fall through. */
5362 case SYMBOL_REF:
5363 return !flag_pic && legitimate_constant_p (x);
5365 default:
5366 return false;
5370 /* Nonzero if the constant value X is a legitimate general operand
5371 when generating PIC code. It is given that flag_pic is on and
5372 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5374 bool
5375 legitimate_pic_operand_p (x)
5376 rtx x;
5378 rtx inner;
5380 switch (GET_CODE (x))
5382 case CONST:
5383 inner = XEXP (x, 0);
5385 /* Only some unspecs are valid as "constants". */
5386 if (GET_CODE (inner) == UNSPEC)
5387 switch (XINT (inner, 1))
5389 case UNSPEC_TPOFF:
5390 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5391 default:
5392 return false;
5394 /* FALLTHRU */
5396 case SYMBOL_REF:
5397 case LABEL_REF:
5398 return legitimate_pic_address_disp_p (x);
5400 default:
5401 return true;
5405 /* Determine if a given CONST RTX is a valid memory displacement
5406 in PIC mode. */
5409 legitimate_pic_address_disp_p (disp)
5410 register rtx disp;
5412 bool saw_plus;
5414 /* In 64bit mode we can allow direct addresses of symbols and labels
5415 when they are not dynamic symbols. */
5416 if (TARGET_64BIT)
5418 /* TLS references should always be enclosed in UNSPEC. */
5419 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5420 return 0;
5421 if (GET_CODE (disp) == SYMBOL_REF
5422 && ix86_cmodel == CM_SMALL_PIC
5423 && (CONSTANT_POOL_ADDRESS_P (disp)
5424 || SYMBOL_REF_FLAG (disp)))
5425 return 1;
5426 if (GET_CODE (disp) == LABEL_REF)
5427 return 1;
5428 if (GET_CODE (disp) == CONST
5429 && GET_CODE (XEXP (disp, 0)) == PLUS
5430 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5431 && ix86_cmodel == CM_SMALL_PIC
5432 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5433 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5434 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5435 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5436 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5437 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5438 return 1;
5440 if (GET_CODE (disp) != CONST)
5441 return 0;
5442 disp = XEXP (disp, 0);
5444 if (TARGET_64BIT)
5446 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5447 of GOT tables. We should not need these anyway. */
5448 if (GET_CODE (disp) != UNSPEC
5449 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5450 return 0;
5452 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5453 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5454 return 0;
5455 return 1;
5458 saw_plus = false;
5459 if (GET_CODE (disp) == PLUS)
5461 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5462 return 0;
5463 disp = XEXP (disp, 0);
5464 saw_plus = true;
5467 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5468 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5470 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5471 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5472 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5474 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5475 if (strstr (sym_name, "$pb") != 0)
5476 return 1;
5480 if (GET_CODE (disp) != UNSPEC)
5481 return 0;
5483 switch (XINT (disp, 1))
5485 case UNSPEC_GOT:
5486 if (saw_plus)
5487 return false;
5488 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5489 case UNSPEC_GOTOFF:
5490 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5491 case UNSPEC_GOTTPOFF:
5492 case UNSPEC_GOTNTPOFF:
5493 case UNSPEC_INDNTPOFF:
5494 if (saw_plus)
5495 return false;
5496 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5497 case UNSPEC_NTPOFF:
5498 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5499 case UNSPEC_DTPOFF:
5500 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5503 return 0;
5506 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5507 memory address for an instruction. The MODE argument is the machine mode
5508 for the MEM expression that wants to use this address.
5510 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5511 convert common non-canonical forms to canonical form so that they will
5512 be recognized. */
5515 legitimate_address_p (mode, addr, strict)
5516 enum machine_mode mode;
5517 register rtx addr;
5518 int strict;
5520 struct ix86_address parts;
5521 rtx base, index, disp;
5522 HOST_WIDE_INT scale;
5523 const char *reason = NULL;
5524 rtx reason_rtx = NULL_RTX;
5526 if (TARGET_DEBUG_ADDR)
5528 fprintf (stderr,
5529 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5530 GET_MODE_NAME (mode), strict);
5531 debug_rtx (addr);
5534 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5536 if (TARGET_DEBUG_ADDR)
5537 fprintf (stderr, "Success.\n");
5538 return TRUE;
5541 if (ix86_decompose_address (addr, &parts) <= 0)
5543 reason = "decomposition failed";
5544 goto report_error;
5547 base = parts.base;
5548 index = parts.index;
5549 disp = parts.disp;
5550 scale = parts.scale;
5552 /* Validate base register.
5554 Don't allow SUBREG's here, it can lead to spill failures when the base
5555 is one word out of a two word structure, which is represented internally
5556 as a DImode int. */
5558 if (base)
5560 rtx reg;
5561 reason_rtx = base;
5563 if (GET_CODE (base) == SUBREG)
5564 reg = SUBREG_REG (base);
5565 else
5566 reg = base;
5568 if (GET_CODE (reg) != REG)
5570 reason = "base is not a register";
5571 goto report_error;
5574 if (GET_MODE (base) != Pmode)
5576 reason = "base is not in Pmode";
5577 goto report_error;
5580 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5581 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5583 reason = "base is not valid";
5584 goto report_error;
5588 /* Validate index register.
5590 Don't allow SUBREG's here, it can lead to spill failures when the index
5591 is one word out of a two word structure, which is represented internally
5592 as a DImode int. */
5594 if (index)
5596 rtx reg;
5597 reason_rtx = index;
5599 if (GET_CODE (index) == SUBREG)
5600 reg = SUBREG_REG (index);
5601 else
5602 reg = index;
5604 if (GET_CODE (reg) != REG)
5606 reason = "index is not a register";
5607 goto report_error;
5610 if (GET_MODE (index) != Pmode)
5612 reason = "index is not in Pmode";
5613 goto report_error;
5616 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5617 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5619 reason = "index is not valid";
5620 goto report_error;
5624 /* Validate scale factor. */
5625 if (scale != 1)
5627 reason_rtx = GEN_INT (scale);
5628 if (!index)
5630 reason = "scale without index";
5631 goto report_error;
5634 if (scale != 2 && scale != 4 && scale != 8)
5636 reason = "scale is not a valid multiplier";
5637 goto report_error;
5641 /* Validate displacement. */
5642 if (disp)
5644 reason_rtx = disp;
5646 if (GET_CODE (disp) == CONST
5647 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5648 switch (XINT (XEXP (disp, 0), 1))
5650 case UNSPEC_GOT:
5651 case UNSPEC_GOTOFF:
5652 case UNSPEC_GOTPCREL:
5653 if (!flag_pic)
5654 abort ();
5655 goto is_legitimate_pic;
5657 case UNSPEC_GOTTPOFF:
5658 case UNSPEC_GOTNTPOFF:
5659 case UNSPEC_INDNTPOFF:
5660 case UNSPEC_NTPOFF:
5661 case UNSPEC_DTPOFF:
5662 break;
5664 default:
5665 reason = "invalid address unspec";
5666 goto report_error;
5669 else if (flag_pic && (SYMBOLIC_CONST (disp)
5670 #if TARGET_MACHO
5671 && !machopic_operand_p (disp)
5672 #endif
5675 is_legitimate_pic:
5676 if (TARGET_64BIT && (index || base))
5678 /* foo@dtpoff(%rX) is ok. */
5679 if (GET_CODE (disp) != CONST
5680 || GET_CODE (XEXP (disp, 0)) != PLUS
5681 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5682 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5683 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5684 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5686 reason = "non-constant pic memory reference";
5687 goto report_error;
5690 else if (! legitimate_pic_address_disp_p (disp))
5692 reason = "displacement is an invalid pic construct";
5693 goto report_error;
5696 /* This code used to verify that a symbolic pic displacement
5697 includes the pic_offset_table_rtx register.
5699 While this is good idea, unfortunately these constructs may
5700 be created by "adds using lea" optimization for incorrect
5701 code like:
5703 int a;
5704 int foo(int i)
5706 return *(&a+i);
5709 This code is nonsensical, but results in addressing
5710 GOT table with pic_offset_table_rtx base. We can't
5711 just refuse it easily, since it gets matched by
5712 "addsi3" pattern, that later gets split to lea in the
5713 case output register differs from input. While this
5714 can be handled by separate addsi pattern for this case
5715 that never results in lea, this seems to be easier and
5716 correct fix for crash to disable this test. */
5718 else if (!CONSTANT_ADDRESS_P (disp))
5720 reason = "displacement is not constant";
5721 goto report_error;
5723 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5725 reason = "displacement is out of range";
5726 goto report_error;
5728 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5730 reason = "displacement is a const_double";
5731 goto report_error;
5735 /* Everything looks valid. */
5736 if (TARGET_DEBUG_ADDR)
5737 fprintf (stderr, "Success.\n");
5738 return TRUE;
5740 report_error:
5741 if (TARGET_DEBUG_ADDR)
5743 fprintf (stderr, "Error: %s\n", reason);
5744 debug_rtx (reason_rtx);
5746 return FALSE;
5749 /* Return an unique alias set for the GOT. */
5751 static HOST_WIDE_INT
5752 ix86_GOT_alias_set ()
5754 static HOST_WIDE_INT set = -1;
5755 if (set == -1)
5756 set = new_alias_set ();
5757 return set;
5760 /* Return a legitimate reference for ORIG (an address) using the
5761 register REG. If REG is 0, a new pseudo is generated.
5763 There are two types of references that must be handled:
5765 1. Global data references must load the address from the GOT, via
5766 the PIC reg. An insn is emitted to do this load, and the reg is
5767 returned.
5769 2. Static data references, constant pool addresses, and code labels
5770 compute the address as an offset from the GOT, whose base is in
5771 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5772 differentiate them from global data objects. The returned
5773 address is the PIC reg + an unspec constant.
5775 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5776 reg also appears in the address. */
5779 legitimize_pic_address (orig, reg)
5780 rtx orig;
5781 rtx reg;
5783 rtx addr = orig;
5784 rtx new = orig;
5785 rtx base;
5787 #if TARGET_MACHO
5788 if (reg == 0)
5789 reg = gen_reg_rtx (Pmode);
5790 /* Use the generic Mach-O PIC machinery. */
5791 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5792 #endif
5794 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5795 new = addr;
5796 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5798 /* This symbol may be referenced via a displacement from the PIC
5799 base address (@GOTOFF). */
5801 if (reload_in_progress)
5802 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5803 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5804 new = gen_rtx_CONST (Pmode, new);
5805 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5807 if (reg != 0)
5809 emit_move_insn (reg, new);
5810 new = reg;
5813 else if (GET_CODE (addr) == SYMBOL_REF)
5815 if (TARGET_64BIT)
5817 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5818 new = gen_rtx_CONST (Pmode, new);
5819 new = gen_rtx_MEM (Pmode, new);
5820 RTX_UNCHANGING_P (new) = 1;
5821 set_mem_alias_set (new, ix86_GOT_alias_set ());
5823 if (reg == 0)
5824 reg = gen_reg_rtx (Pmode);
5825 /* Use directly gen_movsi, otherwise the address is loaded
5826 into register for CSE. We don't want to CSE this addresses,
5827 instead we CSE addresses from the GOT table, so skip this. */
5828 emit_insn (gen_movsi (reg, new));
5829 new = reg;
5831 else
5833 /* This symbol must be referenced via a load from the
5834 Global Offset Table (@GOT). */
5836 if (reload_in_progress)
5837 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5838 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5839 new = gen_rtx_CONST (Pmode, new);
5840 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5841 new = gen_rtx_MEM (Pmode, new);
5842 RTX_UNCHANGING_P (new) = 1;
5843 set_mem_alias_set (new, ix86_GOT_alias_set ());
5845 if (reg == 0)
5846 reg = gen_reg_rtx (Pmode);
5847 emit_move_insn (reg, new);
5848 new = reg;
5851 else
5853 if (GET_CODE (addr) == CONST)
5855 addr = XEXP (addr, 0);
5857 /* We must match stuff we generate before. Assume the only
5858 unspecs that can get here are ours. Not that we could do
5859 anything with them anyway... */
5860 if (GET_CODE (addr) == UNSPEC
5861 || (GET_CODE (addr) == PLUS
5862 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5863 return orig;
5864 if (GET_CODE (addr) != PLUS)
5865 abort ();
5867 if (GET_CODE (addr) == PLUS)
5869 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5871 /* Check first to see if this is a constant offset from a @GOTOFF
5872 symbol reference. */
5873 if (local_symbolic_operand (op0, Pmode)
5874 && GET_CODE (op1) == CONST_INT)
5876 if (!TARGET_64BIT)
5878 if (reload_in_progress)
5879 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5880 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5881 UNSPEC_GOTOFF);
5882 new = gen_rtx_PLUS (Pmode, new, op1);
5883 new = gen_rtx_CONST (Pmode, new);
5884 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5886 if (reg != 0)
5888 emit_move_insn (reg, new);
5889 new = reg;
5892 else
5894 if (INTVAL (op1) < -16*1024*1024
5895 || INTVAL (op1) >= 16*1024*1024)
5896 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5899 else
5901 base = legitimize_pic_address (XEXP (addr, 0), reg);
5902 new = legitimize_pic_address (XEXP (addr, 1),
5903 base == reg ? NULL_RTX : reg);
5905 if (GET_CODE (new) == CONST_INT)
5906 new = plus_constant (base, INTVAL (new));
5907 else
5909 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5911 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5912 new = XEXP (new, 1);
5914 new = gen_rtx_PLUS (Pmode, base, new);
5919 return new;
5922 static void
5923 ix86_encode_section_info (decl, first)
5924 tree decl;
5925 int first ATTRIBUTE_UNUSED;
5927 bool local_p = (*targetm.binds_local_p) (decl);
5928 rtx rtl, symbol;
5930 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5931 if (GET_CODE (rtl) != MEM)
5932 return;
5933 symbol = XEXP (rtl, 0);
5934 if (GET_CODE (symbol) != SYMBOL_REF)
5935 return;
5937 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5938 symbol so that we may access it directly in the GOT. */
5940 if (flag_pic)
5941 SYMBOL_REF_FLAG (symbol) = local_p;
5943 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5944 "local dynamic", "initial exec" or "local exec" TLS models
5945 respectively. */
5947 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5949 const char *symbol_str;
5950 char *newstr;
5951 size_t len;
5952 enum tls_model kind = decl_tls_model (decl);
5954 if (TARGET_64BIT && ! flag_pic)
5956 /* x86-64 doesn't allow non-pic code for shared libraries,
5957 so don't generate GD/LD TLS models for non-pic code. */
5958 switch (kind)
5960 case TLS_MODEL_GLOBAL_DYNAMIC:
5961 kind = TLS_MODEL_INITIAL_EXEC; break;
5962 case TLS_MODEL_LOCAL_DYNAMIC:
5963 kind = TLS_MODEL_LOCAL_EXEC; break;
5964 default:
5965 break;
5969 symbol_str = XSTR (symbol, 0);
5971 if (symbol_str[0] == '%')
5973 if (symbol_str[1] == tls_model_chars[kind])
5974 return;
5975 symbol_str += 2;
5977 len = strlen (symbol_str) + 1;
5978 newstr = alloca (len + 2);
5980 newstr[0] = '%';
5981 newstr[1] = tls_model_chars[kind];
5982 memcpy (newstr + 2, symbol_str, len);
5984 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5988 /* Undo the above when printing symbol names. */
5990 static const char *
5991 ix86_strip_name_encoding (str)
5992 const char *str;
5994 if (str[0] == '%')
5995 str += 2;
5996 if (str [0] == '*')
5997 str += 1;
5998 return str;
6001 /* Load the thread pointer into a register. */
6003 static rtx
6004 get_thread_pointer ()
6006 rtx tp;
6008 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6009 tp = gen_rtx_MEM (Pmode, tp);
6010 RTX_UNCHANGING_P (tp) = 1;
6011 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6012 tp = force_reg (Pmode, tp);
6014 return tp;
6017 /* Try machine-dependent ways of modifying an illegitimate address
6018 to be legitimate. If we find one, return the new, valid address.
6019 This macro is used in only one place: `memory_address' in explow.c.
6021 OLDX is the address as it was before break_out_memory_refs was called.
6022 In some cases it is useful to look at this to decide what needs to be done.
6024 MODE and WIN are passed so that this macro can use
6025 GO_IF_LEGITIMATE_ADDRESS.
6027 It is always safe for this macro to do nothing. It exists to recognize
6028 opportunities to optimize the output.
6030 For the 80386, we handle X+REG by loading X into a register R and
6031 using R+REG. R will go in a general reg and indexing will be used.
6032 However, if REG is a broken-out memory address or multiplication,
6033 nothing needs to be done because REG can certainly go in a general reg.
6035 When -fpic is used, special handling is needed for symbolic references.
6036 See comments by legitimize_pic_address in i386.c for details. */
6039 legitimize_address (x, oldx, mode)
6040 register rtx x;
6041 register rtx oldx ATTRIBUTE_UNUSED;
6042 enum machine_mode mode;
6044 int changed = 0;
6045 unsigned log;
6047 if (TARGET_DEBUG_ADDR)
6049 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6050 GET_MODE_NAME (mode));
6051 debug_rtx (x);
6054 log = tls_symbolic_operand (x, mode);
6055 if (log)
6057 rtx dest, base, off, pic;
6058 int type;
6060 switch (log)
6062 case TLS_MODEL_GLOBAL_DYNAMIC:
6063 dest = gen_reg_rtx (Pmode);
6064 if (TARGET_64BIT)
6066 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6068 start_sequence ();
6069 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6070 insns = get_insns ();
6071 end_sequence ();
6073 emit_libcall_block (insns, dest, rax, x);
6075 else
6076 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6077 break;
6079 case TLS_MODEL_LOCAL_DYNAMIC:
6080 base = gen_reg_rtx (Pmode);
6081 if (TARGET_64BIT)
6083 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6085 start_sequence ();
6086 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6087 insns = get_insns ();
6088 end_sequence ();
6090 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6091 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6092 emit_libcall_block (insns, base, rax, note);
6094 else
6095 emit_insn (gen_tls_local_dynamic_base_32 (base));
6097 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6098 off = gen_rtx_CONST (Pmode, off);
6100 return gen_rtx_PLUS (Pmode, base, off);
6102 case TLS_MODEL_INITIAL_EXEC:
6103 if (TARGET_64BIT)
6105 pic = NULL;
6106 type = UNSPEC_GOTNTPOFF;
6108 else if (flag_pic)
6110 if (reload_in_progress)
6111 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6112 pic = pic_offset_table_rtx;
6113 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6115 else if (!TARGET_GNU_TLS)
6117 pic = gen_reg_rtx (Pmode);
6118 emit_insn (gen_set_got (pic));
6119 type = UNSPEC_GOTTPOFF;
6121 else
6123 pic = NULL;
6124 type = UNSPEC_INDNTPOFF;
6127 base = get_thread_pointer ();
6129 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6130 off = gen_rtx_CONST (Pmode, off);
6131 if (pic)
6132 off = gen_rtx_PLUS (Pmode, pic, off);
6133 off = gen_rtx_MEM (Pmode, off);
6134 RTX_UNCHANGING_P (off) = 1;
6135 set_mem_alias_set (off, ix86_GOT_alias_set ());
6136 dest = gen_reg_rtx (Pmode);
6138 if (TARGET_64BIT || TARGET_GNU_TLS)
6140 emit_move_insn (dest, off);
6141 return gen_rtx_PLUS (Pmode, base, dest);
6143 else
6144 emit_insn (gen_subsi3 (dest, base, off));
6145 break;
6147 case TLS_MODEL_LOCAL_EXEC:
6148 base = get_thread_pointer ();
6150 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6151 (TARGET_64BIT || TARGET_GNU_TLS)
6152 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6153 off = gen_rtx_CONST (Pmode, off);
6155 if (TARGET_64BIT || TARGET_GNU_TLS)
6156 return gen_rtx_PLUS (Pmode, base, off);
6157 else
6159 dest = gen_reg_rtx (Pmode);
6160 emit_insn (gen_subsi3 (dest, base, off));
6162 break;
6164 default:
6165 abort ();
6168 return dest;
6171 if (flag_pic && SYMBOLIC_CONST (x))
6172 return legitimize_pic_address (x, 0);
6174 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6175 if (GET_CODE (x) == ASHIFT
6176 && GET_CODE (XEXP (x, 1)) == CONST_INT
6177 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6179 changed = 1;
6180 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6181 GEN_INT (1 << log));
6184 if (GET_CODE (x) == PLUS)
6186 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6188 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6189 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6190 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6192 changed = 1;
6193 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6194 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6195 GEN_INT (1 << log));
6198 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6199 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6200 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6202 changed = 1;
6203 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6204 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6205 GEN_INT (1 << log));
6208 /* Put multiply first if it isn't already. */
6209 if (GET_CODE (XEXP (x, 1)) == MULT)
6211 rtx tmp = XEXP (x, 0);
6212 XEXP (x, 0) = XEXP (x, 1);
6213 XEXP (x, 1) = tmp;
6214 changed = 1;
6217 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6218 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6219 created by virtual register instantiation, register elimination, and
6220 similar optimizations. */
6221 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6223 changed = 1;
6224 x = gen_rtx_PLUS (Pmode,
6225 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6226 XEXP (XEXP (x, 1), 0)),
6227 XEXP (XEXP (x, 1), 1));
6230 /* Canonicalize
6231 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6232 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6233 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6234 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6235 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6236 && CONSTANT_P (XEXP (x, 1)))
6238 rtx constant;
6239 rtx other = NULL_RTX;
6241 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6243 constant = XEXP (x, 1);
6244 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6246 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6248 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6249 other = XEXP (x, 1);
6251 else
6252 constant = 0;
6254 if (constant)
6256 changed = 1;
6257 x = gen_rtx_PLUS (Pmode,
6258 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6259 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6260 plus_constant (other, INTVAL (constant)));
6264 if (changed && legitimate_address_p (mode, x, FALSE))
6265 return x;
6267 if (GET_CODE (XEXP (x, 0)) == MULT)
6269 changed = 1;
6270 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6273 if (GET_CODE (XEXP (x, 1)) == MULT)
6275 changed = 1;
6276 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6279 if (changed
6280 && GET_CODE (XEXP (x, 1)) == REG
6281 && GET_CODE (XEXP (x, 0)) == REG)
6282 return x;
6284 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6286 changed = 1;
6287 x = legitimize_pic_address (x, 0);
6290 if (changed && legitimate_address_p (mode, x, FALSE))
6291 return x;
6293 if (GET_CODE (XEXP (x, 0)) == REG)
6295 register rtx temp = gen_reg_rtx (Pmode);
6296 register rtx val = force_operand (XEXP (x, 1), temp);
6297 if (val != temp)
6298 emit_move_insn (temp, val);
6300 XEXP (x, 1) = temp;
6301 return x;
6304 else if (GET_CODE (XEXP (x, 1)) == REG)
6306 register rtx temp = gen_reg_rtx (Pmode);
6307 register rtx val = force_operand (XEXP (x, 0), temp);
6308 if (val != temp)
6309 emit_move_insn (temp, val);
6311 XEXP (x, 0) = temp;
6312 return x;
6316 return x;
6319 /* Print an integer constant expression in assembler syntax. Addition
6320 and subtraction are the only arithmetic that may appear in these
6321 expressions. FILE is the stdio stream to write to, X is the rtx, and
6322 CODE is the operand print code from the output string. */
6324 static void
6325 output_pic_addr_const (file, x, code)
6326 FILE *file;
6327 rtx x;
6328 int code;
6330 char buf[256];
6332 switch (GET_CODE (x))
6334 case PC:
6335 if (flag_pic)
6336 putc ('.', file);
6337 else
6338 abort ();
6339 break;
6341 case SYMBOL_REF:
6342 assemble_name (file, XSTR (x, 0));
6343 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6344 fputs ("@PLT", file);
6345 break;
6347 case LABEL_REF:
6348 x = XEXP (x, 0);
6349 /* FALLTHRU */
6350 case CODE_LABEL:
6351 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6352 assemble_name (asm_out_file, buf);
6353 break;
6355 case CONST_INT:
6356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6357 break;
6359 case CONST:
6360 /* This used to output parentheses around the expression,
6361 but that does not work on the 386 (either ATT or BSD assembler). */
6362 output_pic_addr_const (file, XEXP (x, 0), code);
6363 break;
6365 case CONST_DOUBLE:
6366 if (GET_MODE (x) == VOIDmode)
6368 /* We can use %d if the number is <32 bits and positive. */
6369 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6370 fprintf (file, "0x%lx%08lx",
6371 (unsigned long) CONST_DOUBLE_HIGH (x),
6372 (unsigned long) CONST_DOUBLE_LOW (x));
6373 else
6374 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6376 else
6377 /* We can't handle floating point constants;
6378 PRINT_OPERAND must handle them. */
6379 output_operand_lossage ("floating constant misused");
6380 break;
6382 case PLUS:
6383 /* Some assemblers need integer constants to appear first. */
6384 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6386 output_pic_addr_const (file, XEXP (x, 0), code);
6387 putc ('+', file);
6388 output_pic_addr_const (file, XEXP (x, 1), code);
6390 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6392 output_pic_addr_const (file, XEXP (x, 1), code);
6393 putc ('+', file);
6394 output_pic_addr_const (file, XEXP (x, 0), code);
6396 else
6397 abort ();
6398 break;
6400 case MINUS:
6401 if (!TARGET_MACHO)
6402 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6403 output_pic_addr_const (file, XEXP (x, 0), code);
6404 putc ('-', file);
6405 output_pic_addr_const (file, XEXP (x, 1), code);
6406 if (!TARGET_MACHO)
6407 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6408 break;
6410 case UNSPEC:
6411 if (XVECLEN (x, 0) != 1)
6412 abort ();
6413 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6414 switch (XINT (x, 1))
6416 case UNSPEC_GOT:
6417 fputs ("@GOT", file);
6418 break;
6419 case UNSPEC_GOTOFF:
6420 fputs ("@GOTOFF", file);
6421 break;
6422 case UNSPEC_GOTPCREL:
6423 fputs ("@GOTPCREL(%rip)", file);
6424 break;
6425 case UNSPEC_GOTTPOFF:
6426 /* FIXME: This might be @TPOFF in Sun ld too. */
6427 fputs ("@GOTTPOFF", file);
6428 break;
6429 case UNSPEC_TPOFF:
6430 fputs ("@TPOFF", file);
6431 break;
6432 case UNSPEC_NTPOFF:
6433 if (TARGET_64BIT)
6434 fputs ("@TPOFF", file);
6435 else
6436 fputs ("@NTPOFF", file);
6437 break;
6438 case UNSPEC_DTPOFF:
6439 fputs ("@DTPOFF", file);
6440 break;
6441 case UNSPEC_GOTNTPOFF:
6442 if (TARGET_64BIT)
6443 fputs ("@GOTTPOFF(%rip)", file);
6444 else
6445 fputs ("@GOTNTPOFF", file);
6446 break;
6447 case UNSPEC_INDNTPOFF:
6448 fputs ("@INDNTPOFF", file);
6449 break;
6450 default:
6451 output_operand_lossage ("invalid UNSPEC as operand");
6452 break;
6454 break;
6456 default:
6457 output_operand_lossage ("invalid expression as operand");
6461 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6462 We need to handle our special PIC relocations. */
6464 void
6465 i386_dwarf_output_addr_const (file, x)
6466 FILE *file;
6467 rtx x;
6469 #ifdef ASM_QUAD
6470 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6471 #else
6472 if (TARGET_64BIT)
6473 abort ();
6474 fprintf (file, "%s", ASM_LONG);
6475 #endif
6476 if (flag_pic)
6477 output_pic_addr_const (file, x, '\0');
6478 else
6479 output_addr_const (file, x);
6480 fputc ('\n', file);
6483 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6484 We need to emit DTP-relative relocations. */
6486 void
6487 i386_output_dwarf_dtprel (file, size, x)
6488 FILE *file;
6489 int size;
6490 rtx x;
6492 fputs (ASM_LONG, file);
6493 output_addr_const (file, x);
6494 fputs ("@DTPOFF", file);
6495 switch (size)
6497 case 4:
6498 break;
6499 case 8:
6500 fputs (", 0", file);
6501 break;
6502 default:
6503 abort ();
6507 /* In the name of slightly smaller debug output, and to cater to
6508 general assembler losage, recognize PIC+GOTOFF and turn it back
6509 into a direct symbol reference. */
6512 i386_simplify_dwarf_addr (orig_x)
6513 rtx orig_x;
6515 rtx x = orig_x, y;
6517 if (GET_CODE (x) == MEM)
6518 x = XEXP (x, 0);
6520 if (TARGET_64BIT)
6522 if (GET_CODE (x) != CONST
6523 || GET_CODE (XEXP (x, 0)) != UNSPEC
6524 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6525 || GET_CODE (orig_x) != MEM)
6526 return orig_x;
6527 return XVECEXP (XEXP (x, 0), 0, 0);
6530 if (GET_CODE (x) != PLUS
6531 || GET_CODE (XEXP (x, 1)) != CONST)
6532 return orig_x;
6534 if (GET_CODE (XEXP (x, 0)) == REG
6535 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6536 /* %ebx + GOT/GOTOFF */
6537 y = NULL;
6538 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6540 /* %ebx + %reg * scale + GOT/GOTOFF */
6541 y = XEXP (x, 0);
6542 if (GET_CODE (XEXP (y, 0)) == REG
6543 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6544 y = XEXP (y, 1);
6545 else if (GET_CODE (XEXP (y, 1)) == REG
6546 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6547 y = XEXP (y, 0);
6548 else
6549 return orig_x;
6550 if (GET_CODE (y) != REG
6551 && GET_CODE (y) != MULT
6552 && GET_CODE (y) != ASHIFT)
6553 return orig_x;
6555 else
6556 return orig_x;
6558 x = XEXP (XEXP (x, 1), 0);
6559 if (GET_CODE (x) == UNSPEC
6560 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6561 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6563 if (y)
6564 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6565 return XVECEXP (x, 0, 0);
6568 if (GET_CODE (x) == PLUS
6569 && GET_CODE (XEXP (x, 0)) == UNSPEC
6570 && GET_CODE (XEXP (x, 1)) == CONST_INT
6571 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6572 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6573 && GET_CODE (orig_x) != MEM)))
6575 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6576 if (y)
6577 return gen_rtx_PLUS (Pmode, y, x);
6578 return x;
6581 return orig_x;
6584 static void
6585 put_condition_code (code, mode, reverse, fp, file)
6586 enum rtx_code code;
6587 enum machine_mode mode;
6588 int reverse, fp;
6589 FILE *file;
6591 const char *suffix;
6593 if (mode == CCFPmode || mode == CCFPUmode)
6595 enum rtx_code second_code, bypass_code;
6596 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6597 if (bypass_code != NIL || second_code != NIL)
6598 abort ();
6599 code = ix86_fp_compare_code_to_integer (code);
6600 mode = CCmode;
6602 if (reverse)
6603 code = reverse_condition (code);
6605 switch (code)
6607 case EQ:
6608 suffix = "e";
6609 break;
6610 case NE:
6611 suffix = "ne";
6612 break;
6613 case GT:
6614 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6615 abort ();
6616 suffix = "g";
6617 break;
6618 case GTU:
6619 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6620 Those same assemblers have the same but opposite losage on cmov. */
6621 if (mode != CCmode)
6622 abort ();
6623 suffix = fp ? "nbe" : "a";
6624 break;
6625 case LT:
6626 if (mode == CCNOmode || mode == CCGOCmode)
6627 suffix = "s";
6628 else if (mode == CCmode || mode == CCGCmode)
6629 suffix = "l";
6630 else
6631 abort ();
6632 break;
6633 case LTU:
6634 if (mode != CCmode)
6635 abort ();
6636 suffix = "b";
6637 break;
6638 case GE:
6639 if (mode == CCNOmode || mode == CCGOCmode)
6640 suffix = "ns";
6641 else if (mode == CCmode || mode == CCGCmode)
6642 suffix = "ge";
6643 else
6644 abort ();
6645 break;
6646 case GEU:
6647 /* ??? As above. */
6648 if (mode != CCmode)
6649 abort ();
6650 suffix = fp ? "nb" : "ae";
6651 break;
6652 case LE:
6653 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6654 abort ();
6655 suffix = "le";
6656 break;
6657 case LEU:
6658 if (mode != CCmode)
6659 abort ();
6660 suffix = "be";
6661 break;
6662 case UNORDERED:
6663 suffix = fp ? "u" : "p";
6664 break;
6665 case ORDERED:
6666 suffix = fp ? "nu" : "np";
6667 break;
6668 default:
6669 abort ();
6671 fputs (suffix, file);
6674 void
6675 print_reg (x, code, file)
6676 rtx x;
6677 int code;
6678 FILE *file;
6680 if (REGNO (x) == ARG_POINTER_REGNUM
6681 || REGNO (x) == FRAME_POINTER_REGNUM
6682 || REGNO (x) == FLAGS_REG
6683 || REGNO (x) == FPSR_REG)
6684 abort ();
6686 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6687 putc ('%', file);
6689 if (code == 'w' || MMX_REG_P (x))
6690 code = 2;
6691 else if (code == 'b')
6692 code = 1;
6693 else if (code == 'k')
6694 code = 4;
6695 else if (code == 'q')
6696 code = 8;
6697 else if (code == 'y')
6698 code = 3;
6699 else if (code == 'h')
6700 code = 0;
6701 else
6702 code = GET_MODE_SIZE (GET_MODE (x));
6704 /* Irritatingly, AMD extended registers use different naming convention
6705 from the normal registers. */
6706 if (REX_INT_REG_P (x))
6708 if (!TARGET_64BIT)
6709 abort ();
6710 switch (code)
6712 case 0:
6713 error ("extended registers have no high halves");
6714 break;
6715 case 1:
6716 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6717 break;
6718 case 2:
6719 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6720 break;
6721 case 4:
6722 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6723 break;
6724 case 8:
6725 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6726 break;
6727 default:
6728 error ("unsupported operand size for extended register");
6729 break;
6731 return;
6733 switch (code)
6735 case 3:
6736 if (STACK_TOP_P (x))
6738 fputs ("st(0)", file);
6739 break;
6741 /* FALLTHRU */
6742 case 8:
6743 case 4:
6744 case 12:
6745 if (! ANY_FP_REG_P (x))
6746 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6747 /* FALLTHRU */
6748 case 16:
6749 case 2:
6750 fputs (hi_reg_name[REGNO (x)], file);
6751 break;
6752 case 1:
6753 fputs (qi_reg_name[REGNO (x)], file);
6754 break;
6755 case 0:
6756 fputs (qi_high_reg_name[REGNO (x)], file);
6757 break;
6758 default:
6759 abort ();
6763 /* Locate some local-dynamic symbol still in use by this function
6764 so that we can print its name in some tls_local_dynamic_base
6765 pattern. */
6767 static const char *
6768 get_some_local_dynamic_name ()
6770 rtx insn;
6772 if (cfun->machine->some_ld_name)
6773 return cfun->machine->some_ld_name;
6775 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6776 if (INSN_P (insn)
6777 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6778 return cfun->machine->some_ld_name;
6780 abort ();
6783 static int
6784 get_some_local_dynamic_name_1 (px, data)
6785 rtx *px;
6786 void *data ATTRIBUTE_UNUSED;
6788 rtx x = *px;
6790 if (GET_CODE (x) == SYMBOL_REF
6791 && local_dynamic_symbolic_operand (x, Pmode))
6793 cfun->machine->some_ld_name = XSTR (x, 0);
6794 return 1;
6797 return 0;
6800 /* Meaning of CODE:
6801 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6802 C -- print opcode suffix for set/cmov insn.
6803 c -- like C, but print reversed condition
6804 F,f -- likewise, but for floating-point.
6805 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6806 otherwise nothing
6807 R -- print the prefix for register names.
6808 z -- print the opcode suffix for the size of the current operand.
6809 * -- print a star (in certain assembler syntax)
6810 A -- print an absolute memory reference.
6811 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6812 s -- print a shift double count, followed by the assemblers argument
6813 delimiter.
6814 b -- print the QImode name of the register for the indicated operand.
6815 %b0 would print %al if operands[0] is reg 0.
6816 w -- likewise, print the HImode name of the register.
6817 k -- likewise, print the SImode name of the register.
6818 q -- likewise, print the DImode name of the register.
6819 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6820 y -- print "st(0)" instead of "st" as a register.
6821 D -- print condition for SSE cmp instruction.
6822 P -- if PIC, print an @PLT suffix.
6823 X -- don't print any sort of PIC '@' suffix for a symbol.
6824 & -- print some in-use local-dynamic symbol name.
6827 void
6828 print_operand (file, x, code)
6829 FILE *file;
6830 rtx x;
6831 int code;
6833 if (code)
6835 switch (code)
6837 case '*':
6838 if (ASSEMBLER_DIALECT == ASM_ATT)
6839 putc ('*', file);
6840 return;
6842 case '&':
6843 assemble_name (file, get_some_local_dynamic_name ());
6844 return;
6846 case 'A':
6847 if (ASSEMBLER_DIALECT == ASM_ATT)
6848 putc ('*', file);
6849 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6851 /* Intel syntax. For absolute addresses, registers should not
6852 be surrounded by braces. */
6853 if (GET_CODE (x) != REG)
6855 putc ('[', file);
6856 PRINT_OPERAND (file, x, 0);
6857 putc (']', file);
6858 return;
6861 else
6862 abort ();
6864 PRINT_OPERAND (file, x, 0);
6865 return;
6868 case 'L':
6869 if (ASSEMBLER_DIALECT == ASM_ATT)
6870 putc ('l', file);
6871 return;
6873 case 'W':
6874 if (ASSEMBLER_DIALECT == ASM_ATT)
6875 putc ('w', file);
6876 return;
6878 case 'B':
6879 if (ASSEMBLER_DIALECT == ASM_ATT)
6880 putc ('b', file);
6881 return;
6883 case 'Q':
6884 if (ASSEMBLER_DIALECT == ASM_ATT)
6885 putc ('l', file);
6886 return;
6888 case 'S':
6889 if (ASSEMBLER_DIALECT == ASM_ATT)
6890 putc ('s', file);
6891 return;
6893 case 'T':
6894 if (ASSEMBLER_DIALECT == ASM_ATT)
6895 putc ('t', file);
6896 return;
6898 case 'z':
6899 /* 387 opcodes don't get size suffixes if the operands are
6900 registers. */
6901 if (STACK_REG_P (x))
6902 return;
6904 /* Likewise if using Intel opcodes. */
6905 if (ASSEMBLER_DIALECT == ASM_INTEL)
6906 return;
6908 /* This is the size of op from size of operand. */
6909 switch (GET_MODE_SIZE (GET_MODE (x)))
6911 case 2:
6912 #ifdef HAVE_GAS_FILDS_FISTS
6913 putc ('s', file);
6914 #endif
6915 return;
6917 case 4:
6918 if (GET_MODE (x) == SFmode)
6920 putc ('s', file);
6921 return;
6923 else
6924 putc ('l', file);
6925 return;
6927 case 12:
6928 case 16:
6929 putc ('t', file);
6930 return;
6932 case 8:
6933 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6935 #ifdef GAS_MNEMONICS
6936 putc ('q', file);
6937 #else
6938 putc ('l', file);
6939 putc ('l', file);
6940 #endif
6942 else
6943 putc ('l', file);
6944 return;
6946 default:
6947 abort ();
6950 case 'b':
6951 case 'w':
6952 case 'k':
6953 case 'q':
6954 case 'h':
6955 case 'y':
6956 case 'X':
6957 case 'P':
6958 break;
6960 case 's':
6961 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6963 PRINT_OPERAND (file, x, 0);
6964 putc (',', file);
6966 return;
6968 case 'D':
6969 /* Little bit of braindamage here. The SSE compare instructions
6970 does use completely different names for the comparisons that the
6971 fp conditional moves. */
6972 switch (GET_CODE (x))
6974 case EQ:
6975 case UNEQ:
6976 fputs ("eq", file);
6977 break;
6978 case LT:
6979 case UNLT:
6980 fputs ("lt", file);
6981 break;
6982 case LE:
6983 case UNLE:
6984 fputs ("le", file);
6985 break;
6986 case UNORDERED:
6987 fputs ("unord", file);
6988 break;
6989 case NE:
6990 case LTGT:
6991 fputs ("neq", file);
6992 break;
6993 case UNGE:
6994 case GE:
6995 fputs ("nlt", file);
6996 break;
6997 case UNGT:
6998 case GT:
6999 fputs ("nle", file);
7000 break;
7001 case ORDERED:
7002 fputs ("ord", file);
7003 break;
7004 default:
7005 abort ();
7006 break;
7008 return;
7009 case 'O':
7010 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7011 if (ASSEMBLER_DIALECT == ASM_ATT)
7013 switch (GET_MODE (x))
7015 case HImode: putc ('w', file); break;
7016 case SImode:
7017 case SFmode: putc ('l', file); break;
7018 case DImode:
7019 case DFmode: putc ('q', file); break;
7020 default: abort ();
7022 putc ('.', file);
7024 #endif
7025 return;
7026 case 'C':
7027 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7028 return;
7029 case 'F':
7030 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7031 if (ASSEMBLER_DIALECT == ASM_ATT)
7032 putc ('.', file);
7033 #endif
7034 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7035 return;
7037 /* Like above, but reverse condition */
7038 case 'c':
7039 /* Check to see if argument to %c is really a constant
7040 and not a condition code which needs to be reversed. */
7041 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7043 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7044 return;
7046 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7047 return;
7048 case 'f':
7049 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7050 if (ASSEMBLER_DIALECT == ASM_ATT)
7051 putc ('.', file);
7052 #endif
7053 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7054 return;
7055 case '+':
7057 rtx x;
7059 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7060 return;
7062 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7063 if (x)
7065 int pred_val = INTVAL (XEXP (x, 0));
7067 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7068 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7070 int taken = pred_val > REG_BR_PROB_BASE / 2;
7071 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7073 /* Emit hints only in the case default branch prediction
7074 heruistics would fail. */
7075 if (taken != cputaken)
7077 /* We use 3e (DS) prefix for taken branches and
7078 2e (CS) prefix for not taken branches. */
7079 if (taken)
7080 fputs ("ds ; ", file);
7081 else
7082 fputs ("cs ; ", file);
7086 return;
7088 default:
7089 output_operand_lossage ("invalid operand code `%c'", code);
7093 if (GET_CODE (x) == REG)
7095 PRINT_REG (x, code, file);
7098 else if (GET_CODE (x) == MEM)
7100 /* No `byte ptr' prefix for call instructions. */
7101 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7103 const char * size;
7104 switch (GET_MODE_SIZE (GET_MODE (x)))
7106 case 1: size = "BYTE"; break;
7107 case 2: size = "WORD"; break;
7108 case 4: size = "DWORD"; break;
7109 case 8: size = "QWORD"; break;
7110 case 12: size = "XWORD"; break;
7111 case 16: size = "XMMWORD"; break;
7112 default:
7113 abort ();
7116 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7117 if (code == 'b')
7118 size = "BYTE";
7119 else if (code == 'w')
7120 size = "WORD";
7121 else if (code == 'k')
7122 size = "DWORD";
7124 fputs (size, file);
7125 fputs (" PTR ", file);
7128 x = XEXP (x, 0);
7129 if (flag_pic && CONSTANT_ADDRESS_P (x))
7130 output_pic_addr_const (file, x, code);
7131 /* Avoid (%rip) for call operands. */
7132 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7133 && GET_CODE (x) != CONST_INT)
7134 output_addr_const (file, x);
7135 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7136 output_operand_lossage ("invalid constraints for operand");
7137 else
7138 output_address (x);
7141 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7143 REAL_VALUE_TYPE r;
7144 long l;
7146 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7147 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7149 if (ASSEMBLER_DIALECT == ASM_ATT)
7150 putc ('$', file);
7151 fprintf (file, "0x%lx", l);
7154 /* These float cases don't actually occur as immediate operands. */
7155 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7157 char dstr[30];
7159 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7160 fprintf (file, "%s", dstr);
7163 else if (GET_CODE (x) == CONST_DOUBLE
7164 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7166 char dstr[30];
7168 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7169 fprintf (file, "%s", dstr);
7172 else
7174 if (code != 'P')
7176 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7178 if (ASSEMBLER_DIALECT == ASM_ATT)
7179 putc ('$', file);
7181 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7182 || GET_CODE (x) == LABEL_REF)
7184 if (ASSEMBLER_DIALECT == ASM_ATT)
7185 putc ('$', file);
7186 else
7187 fputs ("OFFSET FLAT:", file);
7190 if (GET_CODE (x) == CONST_INT)
7191 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7192 else if (flag_pic)
7193 output_pic_addr_const (file, x, code);
7194 else
7195 output_addr_const (file, x);
7199 /* Print a memory operand whose address is ADDR. */
7201 void
7202 print_operand_address (file, addr)
7203 FILE *file;
7204 register rtx addr;
7206 struct ix86_address parts;
7207 rtx base, index, disp;
7208 int scale;
7210 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7212 if (ASSEMBLER_DIALECT == ASM_INTEL)
7213 fputs ("DWORD PTR ", file);
7214 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7215 putc ('%', file);
7216 if (TARGET_64BIT)
7217 fputs ("fs:0", file);
7218 else
7219 fputs ("gs:0", file);
7220 return;
7223 if (! ix86_decompose_address (addr, &parts))
7224 abort ();
7226 base = parts.base;
7227 index = parts.index;
7228 disp = parts.disp;
7229 scale = parts.scale;
7231 if (!base && !index)
7233 /* Displacement only requires special attention. */
7235 if (GET_CODE (disp) == CONST_INT)
7237 if (ASSEMBLER_DIALECT == ASM_INTEL)
7239 if (USER_LABEL_PREFIX[0] == 0)
7240 putc ('%', file);
7241 fputs ("ds:", file);
7243 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7245 else if (flag_pic)
7246 output_pic_addr_const (file, addr, 0);
7247 else
7248 output_addr_const (file, addr);
7250 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7251 if (TARGET_64BIT
7252 && ((GET_CODE (addr) == SYMBOL_REF
7253 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7254 || GET_CODE (addr) == LABEL_REF
7255 || (GET_CODE (addr) == CONST
7256 && GET_CODE (XEXP (addr, 0)) == PLUS
7257 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7258 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7259 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7260 fputs ("(%rip)", file);
7262 else
7264 if (ASSEMBLER_DIALECT == ASM_ATT)
7266 if (disp)
7268 if (flag_pic)
7269 output_pic_addr_const (file, disp, 0);
7270 else if (GET_CODE (disp) == LABEL_REF)
7271 output_asm_label (disp);
7272 else
7273 output_addr_const (file, disp);
7276 putc ('(', file);
7277 if (base)
7278 PRINT_REG (base, 0, file);
7279 if (index)
7281 putc (',', file);
7282 PRINT_REG (index, 0, file);
7283 if (scale != 1)
7284 fprintf (file, ",%d", scale);
7286 putc (')', file);
7288 else
7290 rtx offset = NULL_RTX;
7292 if (disp)
7294 /* Pull out the offset of a symbol; print any symbol itself. */
7295 if (GET_CODE (disp) == CONST
7296 && GET_CODE (XEXP (disp, 0)) == PLUS
7297 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7299 offset = XEXP (XEXP (disp, 0), 1);
7300 disp = gen_rtx_CONST (VOIDmode,
7301 XEXP (XEXP (disp, 0), 0));
7304 if (flag_pic)
7305 output_pic_addr_const (file, disp, 0);
7306 else if (GET_CODE (disp) == LABEL_REF)
7307 output_asm_label (disp);
7308 else if (GET_CODE (disp) == CONST_INT)
7309 offset = disp;
7310 else
7311 output_addr_const (file, disp);
7314 putc ('[', file);
7315 if (base)
7317 PRINT_REG (base, 0, file);
7318 if (offset)
7320 if (INTVAL (offset) >= 0)
7321 putc ('+', file);
7322 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7325 else if (offset)
7326 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7327 else
7328 putc ('0', file);
7330 if (index)
7332 putc ('+', file);
7333 PRINT_REG (index, 0, file);
7334 if (scale != 1)
7335 fprintf (file, "*%d", scale);
7337 putc (']', file);
7342 bool
7343 output_addr_const_extra (file, x)
7344 FILE *file;
7345 rtx x;
7347 rtx op;
7349 if (GET_CODE (x) != UNSPEC)
7350 return false;
7352 op = XVECEXP (x, 0, 0);
7353 switch (XINT (x, 1))
7355 case UNSPEC_GOTTPOFF:
7356 output_addr_const (file, op);
7357 /* FIXME: This might be @TPOFF in Sun ld. */
7358 fputs ("@GOTTPOFF", file);
7359 break;
7360 case UNSPEC_TPOFF:
7361 output_addr_const (file, op);
7362 fputs ("@TPOFF", file);
7363 break;
7364 case UNSPEC_NTPOFF:
7365 output_addr_const (file, op);
7366 if (TARGET_64BIT)
7367 fputs ("@TPOFF", file);
7368 else
7369 fputs ("@NTPOFF", file);
7370 break;
7371 case UNSPEC_DTPOFF:
7372 output_addr_const (file, op);
7373 fputs ("@DTPOFF", file);
7374 break;
7375 case UNSPEC_GOTNTPOFF:
7376 output_addr_const (file, op);
7377 if (TARGET_64BIT)
7378 fputs ("@GOTTPOFF(%rip)", file);
7379 else
7380 fputs ("@GOTNTPOFF", file);
7381 break;
7382 case UNSPEC_INDNTPOFF:
7383 output_addr_const (file, op);
7384 fputs ("@INDNTPOFF", file);
7385 break;
7387 default:
7388 return false;
7391 return true;
7394 /* Split one or more DImode RTL references into pairs of SImode
7395 references. The RTL can be REG, offsettable MEM, integer constant, or
7396 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7397 split and "num" is its length. lo_half and hi_half are output arrays
7398 that parallel "operands". */
7400 void
7401 split_di (operands, num, lo_half, hi_half)
7402 rtx operands[];
7403 int num;
7404 rtx lo_half[], hi_half[];
7406 while (num--)
7408 rtx op = operands[num];
7410 /* simplify_subreg refuse to split volatile memory addresses,
7411 but we still have to handle it. */
7412 if (GET_CODE (op) == MEM)
7414 lo_half[num] = adjust_address (op, SImode, 0);
7415 hi_half[num] = adjust_address (op, SImode, 4);
7417 else
7419 lo_half[num] = simplify_gen_subreg (SImode, op,
7420 GET_MODE (op) == VOIDmode
7421 ? DImode : GET_MODE (op), 0);
7422 hi_half[num] = simplify_gen_subreg (SImode, op,
7423 GET_MODE (op) == VOIDmode
7424 ? DImode : GET_MODE (op), 4);
7428 /* Split one or more TImode RTL references into pairs of SImode
7429 references. The RTL can be REG, offsettable MEM, integer constant, or
7430 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7431 split and "num" is its length. lo_half and hi_half are output arrays
7432 that parallel "operands". */
7434 void
7435 split_ti (operands, num, lo_half, hi_half)
7436 rtx operands[];
7437 int num;
7438 rtx lo_half[], hi_half[];
7440 while (num--)
7442 rtx op = operands[num];
7444 /* simplify_subreg refuse to split volatile memory addresses, but we
7445 still have to handle it. */
7446 if (GET_CODE (op) == MEM)
7448 lo_half[num] = adjust_address (op, DImode, 0);
7449 hi_half[num] = adjust_address (op, DImode, 8);
7451 else
7453 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7454 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7459 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7460 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7461 is the expression of the binary operation. The output may either be
7462 emitted here, or returned to the caller, like all output_* functions.
7464 There is no guarantee that the operands are the same mode, as they
7465 might be within FLOAT or FLOAT_EXTEND expressions. */
7467 #ifndef SYSV386_COMPAT
7468 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7469 wants to fix the assemblers because that causes incompatibility
7470 with gcc. No-one wants to fix gcc because that causes
7471 incompatibility with assemblers... You can use the option of
7472 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7473 #define SYSV386_COMPAT 1
7474 #endif
7476 const char *
7477 output_387_binary_op (insn, operands)
7478 rtx insn;
7479 rtx *operands;
7481 static char buf[30];
7482 const char *p;
7483 const char *ssep;
7484 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7486 #ifdef ENABLE_CHECKING
7487 /* Even if we do not want to check the inputs, this documents input
7488 constraints. Which helps in understanding the following code. */
7489 if (STACK_REG_P (operands[0])
7490 && ((REG_P (operands[1])
7491 && REGNO (operands[0]) == REGNO (operands[1])
7492 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7493 || (REG_P (operands[2])
7494 && REGNO (operands[0]) == REGNO (operands[2])
7495 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7496 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7497 ; /* ok */
7498 else if (!is_sse)
7499 abort ();
7500 #endif
7502 switch (GET_CODE (operands[3]))
7504 case PLUS:
7505 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7506 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7507 p = "fiadd";
7508 else
7509 p = "fadd";
7510 ssep = "add";
7511 break;
7513 case MINUS:
7514 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7515 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7516 p = "fisub";
7517 else
7518 p = "fsub";
7519 ssep = "sub";
7520 break;
7522 case MULT:
7523 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7524 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7525 p = "fimul";
7526 else
7527 p = "fmul";
7528 ssep = "mul";
7529 break;
7531 case DIV:
7532 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7533 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7534 p = "fidiv";
7535 else
7536 p = "fdiv";
7537 ssep = "div";
7538 break;
7540 default:
7541 abort ();
7544 if (is_sse)
7546 strcpy (buf, ssep);
7547 if (GET_MODE (operands[0]) == SFmode)
7548 strcat (buf, "ss\t{%2, %0|%0, %2}");
7549 else
7550 strcat (buf, "sd\t{%2, %0|%0, %2}");
7551 return buf;
7553 strcpy (buf, p);
7555 switch (GET_CODE (operands[3]))
7557 case MULT:
7558 case PLUS:
7559 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7561 rtx temp = operands[2];
7562 operands[2] = operands[1];
7563 operands[1] = temp;
7566 /* know operands[0] == operands[1]. */
7568 if (GET_CODE (operands[2]) == MEM)
7570 p = "%z2\t%2";
7571 break;
7574 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7576 if (STACK_TOP_P (operands[0]))
7577 /* How is it that we are storing to a dead operand[2]?
7578 Well, presumably operands[1] is dead too. We can't
7579 store the result to st(0) as st(0) gets popped on this
7580 instruction. Instead store to operands[2] (which I
7581 think has to be st(1)). st(1) will be popped later.
7582 gcc <= 2.8.1 didn't have this check and generated
7583 assembly code that the Unixware assembler rejected. */
7584 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7585 else
7586 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7587 break;
7590 if (STACK_TOP_P (operands[0]))
7591 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7592 else
7593 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7594 break;
7596 case MINUS:
7597 case DIV:
7598 if (GET_CODE (operands[1]) == MEM)
7600 p = "r%z1\t%1";
7601 break;
7604 if (GET_CODE (operands[2]) == MEM)
7606 p = "%z2\t%2";
7607 break;
7610 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7612 #if SYSV386_COMPAT
7613 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7614 derived assemblers, confusingly reverse the direction of
7615 the operation for fsub{r} and fdiv{r} when the
7616 destination register is not st(0). The Intel assembler
7617 doesn't have this brain damage. Read !SYSV386_COMPAT to
7618 figure out what the hardware really does. */
7619 if (STACK_TOP_P (operands[0]))
7620 p = "{p\t%0, %2|rp\t%2, %0}";
7621 else
7622 p = "{rp\t%2, %0|p\t%0, %2}";
7623 #else
7624 if (STACK_TOP_P (operands[0]))
7625 /* As above for fmul/fadd, we can't store to st(0). */
7626 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7627 else
7628 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7629 #endif
7630 break;
7633 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7635 #if SYSV386_COMPAT
7636 if (STACK_TOP_P (operands[0]))
7637 p = "{rp\t%0, %1|p\t%1, %0}";
7638 else
7639 p = "{p\t%1, %0|rp\t%0, %1}";
7640 #else
7641 if (STACK_TOP_P (operands[0]))
7642 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7643 else
7644 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7645 #endif
7646 break;
7649 if (STACK_TOP_P (operands[0]))
7651 if (STACK_TOP_P (operands[1]))
7652 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7653 else
7654 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7655 break;
7657 else if (STACK_TOP_P (operands[1]))
7659 #if SYSV386_COMPAT
7660 p = "{\t%1, %0|r\t%0, %1}";
7661 #else
7662 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7663 #endif
7665 else
7667 #if SYSV386_COMPAT
7668 p = "{r\t%2, %0|\t%0, %2}";
7669 #else
7670 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7671 #endif
7673 break;
7675 default:
7676 abort ();
7679 strcat (buf, p);
7680 return buf;
7683 /* Output code to initialize control word copies used by
7684 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7685 is set to control word rounding downwards. */
7686 void
7687 emit_i387_cw_initialization (normal, round_down)
7688 rtx normal, round_down;
7690 rtx reg = gen_reg_rtx (HImode);
7692 emit_insn (gen_x86_fnstcw_1 (normal));
7693 emit_move_insn (reg, normal);
7694 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7695 && !TARGET_64BIT)
7696 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7697 else
7698 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7699 emit_move_insn (round_down, reg);
7702 /* Output code for INSN to convert a float to a signed int. OPERANDS
7703 are the insn operands. The output may be [HSD]Imode and the input
7704 operand may be [SDX]Fmode. */
7706 const char *
7707 output_fix_trunc (insn, operands)
7708 rtx insn;
7709 rtx *operands;
7711 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7712 int dimode_p = GET_MODE (operands[0]) == DImode;
7714 /* Jump through a hoop or two for DImode, since the hardware has no
7715 non-popping instruction. We used to do this a different way, but
7716 that was somewhat fragile and broke with post-reload splitters. */
7717 if (dimode_p && !stack_top_dies)
7718 output_asm_insn ("fld\t%y1", operands);
7720 if (!STACK_TOP_P (operands[1]))
7721 abort ();
7723 if (GET_CODE (operands[0]) != MEM)
7724 abort ();
7726 output_asm_insn ("fldcw\t%3", operands);
7727 if (stack_top_dies || dimode_p)
7728 output_asm_insn ("fistp%z0\t%0", operands);
7729 else
7730 output_asm_insn ("fist%z0\t%0", operands);
7731 output_asm_insn ("fldcw\t%2", operands);
7733 return "";
7736 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7737 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7738 when fucom should be used. */
7740 const char *
7741 output_fp_compare (insn, operands, eflags_p, unordered_p)
7742 rtx insn;
7743 rtx *operands;
7744 int eflags_p, unordered_p;
7746 int stack_top_dies;
7747 rtx cmp_op0 = operands[0];
7748 rtx cmp_op1 = operands[1];
7749 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7751 if (eflags_p == 2)
7753 cmp_op0 = cmp_op1;
7754 cmp_op1 = operands[2];
7756 if (is_sse)
7758 if (GET_MODE (operands[0]) == SFmode)
7759 if (unordered_p)
7760 return "ucomiss\t{%1, %0|%0, %1}";
7761 else
7762 return "comiss\t{%1, %0|%0, %1}";
7763 else
7764 if (unordered_p)
7765 return "ucomisd\t{%1, %0|%0, %1}";
7766 else
7767 return "comisd\t{%1, %0|%0, %1}";
7770 if (! STACK_TOP_P (cmp_op0))
7771 abort ();
7773 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7775 if (STACK_REG_P (cmp_op1)
7776 && stack_top_dies
7777 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7778 && REGNO (cmp_op1) != FIRST_STACK_REG)
7780 /* If both the top of the 387 stack dies, and the other operand
7781 is also a stack register that dies, then this must be a
7782 `fcompp' float compare */
7784 if (eflags_p == 1)
7786 /* There is no double popping fcomi variant. Fortunately,
7787 eflags is immune from the fstp's cc clobbering. */
7788 if (unordered_p)
7789 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7790 else
7791 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7792 return "fstp\t%y0";
7794 else
7796 if (eflags_p == 2)
7798 if (unordered_p)
7799 return "fucompp\n\tfnstsw\t%0";
7800 else
7801 return "fcompp\n\tfnstsw\t%0";
7803 else
7805 if (unordered_p)
7806 return "fucompp";
7807 else
7808 return "fcompp";
7812 else
7814 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7816 static const char * const alt[24] =
7818 "fcom%z1\t%y1",
7819 "fcomp%z1\t%y1",
7820 "fucom%z1\t%y1",
7821 "fucomp%z1\t%y1",
7823 "ficom%z1\t%y1",
7824 "ficomp%z1\t%y1",
7825 NULL,
7826 NULL,
7828 "fcomi\t{%y1, %0|%0, %y1}",
7829 "fcomip\t{%y1, %0|%0, %y1}",
7830 "fucomi\t{%y1, %0|%0, %y1}",
7831 "fucomip\t{%y1, %0|%0, %y1}",
7833 NULL,
7834 NULL,
7835 NULL,
7836 NULL,
7838 "fcom%z2\t%y2\n\tfnstsw\t%0",
7839 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7840 "fucom%z2\t%y2\n\tfnstsw\t%0",
7841 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7843 "ficom%z2\t%y2\n\tfnstsw\t%0",
7844 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7845 NULL,
7846 NULL
7849 int mask;
7850 const char *ret;
7852 mask = eflags_p << 3;
7853 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7854 mask |= unordered_p << 1;
7855 mask |= stack_top_dies;
7857 if (mask >= 24)
7858 abort ();
7859 ret = alt[mask];
7860 if (ret == NULL)
7861 abort ();
7863 return ret;
7867 void
7868 ix86_output_addr_vec_elt (file, value)
7869 FILE *file;
7870 int value;
7872 const char *directive = ASM_LONG;
7874 if (TARGET_64BIT)
7876 #ifdef ASM_QUAD
7877 directive = ASM_QUAD;
7878 #else
7879 abort ();
7880 #endif
7883 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7886 void
7887 ix86_output_addr_diff_elt (file, value, rel)
7888 FILE *file;
7889 int value, rel;
7891 if (TARGET_64BIT)
7892 fprintf (file, "%s%s%d-%s%d\n",
7893 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7894 else if (HAVE_AS_GOTOFF_IN_DATA)
7895 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7896 #if TARGET_MACHO
7897 else if (TARGET_MACHO)
7898 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7899 machopic_function_base_name () + 1);
7900 #endif
7901 else
7902 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7903 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7906 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7907 for the target. */
7909 void
7910 ix86_expand_clear (dest)
7911 rtx dest;
7913 rtx tmp;
7915 /* We play register width games, which are only valid after reload. */
7916 if (!reload_completed)
7917 abort ();
7919 /* Avoid HImode and its attendant prefix byte. */
7920 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7921 dest = gen_rtx_REG (SImode, REGNO (dest));
7923 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7925 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7926 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7928 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7929 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7932 emit_insn (tmp);
7935 /* X is an unchanging MEM. If it is a constant pool reference, return
7936 the constant pool rtx, else NULL. */
7938 static rtx
7939 maybe_get_pool_constant (x)
7940 rtx x;
7942 x = XEXP (x, 0);
7944 if (flag_pic && ! TARGET_64BIT)
7946 if (GET_CODE (x) != PLUS)
7947 return NULL_RTX;
7948 if (XEXP (x, 0) != pic_offset_table_rtx)
7949 return NULL_RTX;
7950 x = XEXP (x, 1);
7951 if (GET_CODE (x) != CONST)
7952 return NULL_RTX;
7953 x = XEXP (x, 0);
7954 if (GET_CODE (x) != UNSPEC)
7955 return NULL_RTX;
7956 if (XINT (x, 1) != UNSPEC_GOTOFF)
7957 return NULL_RTX;
7958 x = XVECEXP (x, 0, 0);
7961 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7962 return get_pool_constant (x);
7964 return NULL_RTX;
7967 void
7968 ix86_expand_move (mode, operands)
7969 enum machine_mode mode;
7970 rtx operands[];
7972 int strict = (reload_in_progress || reload_completed);
7973 rtx insn, op0, op1, tmp;
7975 op0 = operands[0];
7976 op1 = operands[1];
7978 if (tls_symbolic_operand (op1, Pmode))
7980 op1 = legitimize_address (op1, op1, VOIDmode);
7981 if (GET_CODE (op0) == MEM)
7983 tmp = gen_reg_rtx (mode);
7984 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7985 op1 = tmp;
7988 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7990 #if TARGET_MACHO
7991 if (MACHOPIC_PURE)
7993 rtx temp = ((reload_in_progress
7994 || ((op0 && GET_CODE (op0) == REG)
7995 && mode == Pmode))
7996 ? op0 : gen_reg_rtx (Pmode));
7997 op1 = machopic_indirect_data_reference (op1, temp);
7998 op1 = machopic_legitimize_pic_address (op1, mode,
7999 temp == op1 ? 0 : temp);
8001 else
8003 if (MACHOPIC_INDIRECT)
8004 op1 = machopic_indirect_data_reference (op1, 0);
8006 if (op0 != op1)
8008 insn = gen_rtx_SET (VOIDmode, op0, op1);
8009 emit_insn (insn);
8011 return;
8012 #endif /* TARGET_MACHO */
8013 if (GET_CODE (op0) == MEM)
8014 op1 = force_reg (Pmode, op1);
8015 else
8017 rtx temp = op0;
8018 if (GET_CODE (temp) != REG)
8019 temp = gen_reg_rtx (Pmode);
8020 temp = legitimize_pic_address (op1, temp);
8021 if (temp == op0)
8022 return;
8023 op1 = temp;
8026 else
8028 if (GET_CODE (op0) == MEM
8029 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8030 || !push_operand (op0, mode))
8031 && GET_CODE (op1) == MEM)
8032 op1 = force_reg (mode, op1);
8034 if (push_operand (op0, mode)
8035 && ! general_no_elim_operand (op1, mode))
8036 op1 = copy_to_mode_reg (mode, op1);
8038 /* Force large constants in 64bit compilation into register
8039 to get them CSEed. */
8040 if (TARGET_64BIT && mode == DImode
8041 && immediate_operand (op1, mode)
8042 && !x86_64_zero_extended_value (op1)
8043 && !register_operand (op0, mode)
8044 && optimize && !reload_completed && !reload_in_progress)
8045 op1 = copy_to_mode_reg (mode, op1);
8047 if (FLOAT_MODE_P (mode))
8049 /* If we are loading a floating point constant to a register,
8050 force the value to memory now, since we'll get better code
8051 out the back end. */
8053 if (strict)
8055 else if (GET_CODE (op1) == CONST_DOUBLE)
8057 op1 = validize_mem (force_const_mem (mode, op1));
8058 if (!register_operand (op0, mode))
8060 rtx temp = gen_reg_rtx (mode);
8061 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8062 emit_move_insn (op0, temp);
8063 return;
8069 insn = gen_rtx_SET (VOIDmode, op0, op1);
8071 emit_insn (insn);
8074 void
8075 ix86_expand_vector_move (mode, operands)
8076 enum machine_mode mode;
8077 rtx operands[];
8079 /* Force constants other than zero into memory. We do not know how
8080 the instructions used to build constants modify the upper 64 bits
8081 of the register, once we have that information we may be able
8082 to handle some of them more efficiently. */
8083 if ((reload_in_progress | reload_completed) == 0
8084 && register_operand (operands[0], mode)
8085 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8087 operands[1] = force_const_mem (mode, operands[1]);
8088 emit_move_insn (operands[0], operands[1]);
8089 return;
8092 /* Make operand1 a register if it isn't already. */
8093 if (!no_new_pseudos
8094 && !register_operand (operands[0], mode)
8095 && !register_operand (operands[1], mode))
8097 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8098 emit_move_insn (operands[0], temp);
8099 return;
8102 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8105 /* Attempt to expand a binary operator. Make the expansion closer to the
8106 actual machine, then just general_operand, which will allow 3 separate
8107 memory references (one output, two input) in a single insn. */
8109 void
8110 ix86_expand_binary_operator (code, mode, operands)
8111 enum rtx_code code;
8112 enum machine_mode mode;
8113 rtx operands[];
8115 int matching_memory;
8116 rtx src1, src2, dst, op, clob;
8118 dst = operands[0];
8119 src1 = operands[1];
8120 src2 = operands[2];
8122 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8123 if (GET_RTX_CLASS (code) == 'c'
8124 && (rtx_equal_p (dst, src2)
8125 || immediate_operand (src1, mode)))
8127 rtx temp = src1;
8128 src1 = src2;
8129 src2 = temp;
8132 /* If the destination is memory, and we do not have matching source
8133 operands, do things in registers. */
8134 matching_memory = 0;
8135 if (GET_CODE (dst) == MEM)
8137 if (rtx_equal_p (dst, src1))
8138 matching_memory = 1;
8139 else if (GET_RTX_CLASS (code) == 'c'
8140 && rtx_equal_p (dst, src2))
8141 matching_memory = 2;
8142 else
8143 dst = gen_reg_rtx (mode);
8146 /* Both source operands cannot be in memory. */
8147 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8149 if (matching_memory != 2)
8150 src2 = force_reg (mode, src2);
8151 else
8152 src1 = force_reg (mode, src1);
8155 /* If the operation is not commutable, source 1 cannot be a constant
8156 or non-matching memory. */
8157 if ((CONSTANT_P (src1)
8158 || (!matching_memory && GET_CODE (src1) == MEM))
8159 && GET_RTX_CLASS (code) != 'c')
8160 src1 = force_reg (mode, src1);
8162 /* If optimizing, copy to regs to improve CSE */
8163 if (optimize && ! no_new_pseudos)
8165 if (GET_CODE (dst) == MEM)
8166 dst = gen_reg_rtx (mode);
8167 if (GET_CODE (src1) == MEM)
8168 src1 = force_reg (mode, src1);
8169 if (GET_CODE (src2) == MEM)
8170 src2 = force_reg (mode, src2);
8173 /* Emit the instruction. */
8175 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8176 if (reload_in_progress)
8178 /* Reload doesn't know about the flags register, and doesn't know that
8179 it doesn't want to clobber it. We can only do this with PLUS. */
8180 if (code != PLUS)
8181 abort ();
8182 emit_insn (op);
8184 else
8186 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8187 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8190 /* Fix up the destination if needed. */
8191 if (dst != operands[0])
8192 emit_move_insn (operands[0], dst);
8195 /* Return TRUE or FALSE depending on whether the binary operator meets the
8196 appropriate constraints. */
8199 ix86_binary_operator_ok (code, mode, operands)
8200 enum rtx_code code;
8201 enum machine_mode mode ATTRIBUTE_UNUSED;
8202 rtx operands[3];
8204 /* Both source operands cannot be in memory. */
8205 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8206 return 0;
8207 /* If the operation is not commutable, source 1 cannot be a constant. */
8208 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8209 return 0;
8210 /* If the destination is memory, we must have a matching source operand. */
8211 if (GET_CODE (operands[0]) == MEM
8212 && ! (rtx_equal_p (operands[0], operands[1])
8213 || (GET_RTX_CLASS (code) == 'c'
8214 && rtx_equal_p (operands[0], operands[2]))))
8215 return 0;
8216 /* If the operation is not commutable and the source 1 is memory, we must
8217 have a matching destination. */
8218 if (GET_CODE (operands[1]) == MEM
8219 && GET_RTX_CLASS (code) != 'c'
8220 && ! rtx_equal_p (operands[0], operands[1]))
8221 return 0;
8222 return 1;
8225 /* Attempt to expand a unary operator. Make the expansion closer to the
8226 actual machine, then just general_operand, which will allow 2 separate
8227 memory references (one output, one input) in a single insn. */
8229 void
8230 ix86_expand_unary_operator (code, mode, operands)
8231 enum rtx_code code;
8232 enum machine_mode mode;
8233 rtx operands[];
8235 int matching_memory;
8236 rtx src, dst, op, clob;
8238 dst = operands[0];
8239 src = operands[1];
8241 /* If the destination is memory, and we do not have matching source
8242 operands, do things in registers. */
8243 matching_memory = 0;
8244 if (GET_CODE (dst) == MEM)
8246 if (rtx_equal_p (dst, src))
8247 matching_memory = 1;
8248 else
8249 dst = gen_reg_rtx (mode);
8252 /* When source operand is memory, destination must match. */
8253 if (!matching_memory && GET_CODE (src) == MEM)
8254 src = force_reg (mode, src);
8256 /* If optimizing, copy to regs to improve CSE */
8257 if (optimize && ! no_new_pseudos)
8259 if (GET_CODE (dst) == MEM)
8260 dst = gen_reg_rtx (mode);
8261 if (GET_CODE (src) == MEM)
8262 src = force_reg (mode, src);
8265 /* Emit the instruction. */
8267 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8268 if (reload_in_progress || code == NOT)
8270 /* Reload doesn't know about the flags register, and doesn't know that
8271 it doesn't want to clobber it. */
8272 if (code != NOT)
8273 abort ();
8274 emit_insn (op);
8276 else
8278 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8279 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8282 /* Fix up the destination if needed. */
8283 if (dst != operands[0])
8284 emit_move_insn (operands[0], dst);
8287 /* Return TRUE or FALSE depending on whether the unary operator meets the
8288 appropriate constraints. */
8291 ix86_unary_operator_ok (code, mode, operands)
8292 enum rtx_code code ATTRIBUTE_UNUSED;
8293 enum machine_mode mode ATTRIBUTE_UNUSED;
8294 rtx operands[2] ATTRIBUTE_UNUSED;
8296 /* If one of operands is memory, source and destination must match. */
8297 if ((GET_CODE (operands[0]) == MEM
8298 || GET_CODE (operands[1]) == MEM)
8299 && ! rtx_equal_p (operands[0], operands[1]))
8300 return FALSE;
8301 return TRUE;
8304 /* Return TRUE or FALSE depending on whether the first SET in INSN
8305 has source and destination with matching CC modes, and that the
8306 CC mode is at least as constrained as REQ_MODE. */
8309 ix86_match_ccmode (insn, req_mode)
8310 rtx insn;
8311 enum machine_mode req_mode;
8313 rtx set;
8314 enum machine_mode set_mode;
8316 set = PATTERN (insn);
8317 if (GET_CODE (set) == PARALLEL)
8318 set = XVECEXP (set, 0, 0);
8319 if (GET_CODE (set) != SET)
8320 abort ();
8321 if (GET_CODE (SET_SRC (set)) != COMPARE)
8322 abort ();
8324 set_mode = GET_MODE (SET_DEST (set));
8325 switch (set_mode)
8327 case CCNOmode:
8328 if (req_mode != CCNOmode
8329 && (req_mode != CCmode
8330 || XEXP (SET_SRC (set), 1) != const0_rtx))
8331 return 0;
8332 break;
8333 case CCmode:
8334 if (req_mode == CCGCmode)
8335 return 0;
8336 /* FALLTHRU */
8337 case CCGCmode:
8338 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8339 return 0;
8340 /* FALLTHRU */
8341 case CCGOCmode:
8342 if (req_mode == CCZmode)
8343 return 0;
8344 /* FALLTHRU */
8345 case CCZmode:
8346 break;
8348 default:
8349 abort ();
8352 return (GET_MODE (SET_SRC (set)) == set_mode);
8355 /* Generate insn patterns to do an integer compare of OPERANDS. */
8357 static rtx
8358 ix86_expand_int_compare (code, op0, op1)
8359 enum rtx_code code;
8360 rtx op0, op1;
8362 enum machine_mode cmpmode;
8363 rtx tmp, flags;
8365 cmpmode = SELECT_CC_MODE (code, op0, op1);
8366 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8368 /* This is very simple, but making the interface the same as in the
8369 FP case makes the rest of the code easier. */
8370 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8371 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8373 /* Return the test that should be put into the flags user, i.e.
8374 the bcc, scc, or cmov instruction. */
8375 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8378 /* Figure out whether to use ordered or unordered fp comparisons.
8379 Return the appropriate mode to use. */
8381 enum machine_mode
8382 ix86_fp_compare_mode (code)
8383 enum rtx_code code ATTRIBUTE_UNUSED;
8385 /* ??? In order to make all comparisons reversible, we do all comparisons
8386 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8387 all forms trapping and nontrapping comparisons, we can make inequality
8388 comparisons trapping again, since it results in better code when using
8389 FCOM based compares. */
8390 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8393 enum machine_mode
8394 ix86_cc_mode (code, op0, op1)
8395 enum rtx_code code;
8396 rtx op0, op1;
8398 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8399 return ix86_fp_compare_mode (code);
8400 switch (code)
8402 /* Only zero flag is needed. */
8403 case EQ: /* ZF=0 */
8404 case NE: /* ZF!=0 */
8405 return CCZmode;
8406 /* Codes needing carry flag. */
8407 case GEU: /* CF=0 */
8408 case GTU: /* CF=0 & ZF=0 */
8409 case LTU: /* CF=1 */
8410 case LEU: /* CF=1 | ZF=1 */
8411 return CCmode;
8412 /* Codes possibly doable only with sign flag when
8413 comparing against zero. */
8414 case GE: /* SF=OF or SF=0 */
8415 case LT: /* SF<>OF or SF=1 */
8416 if (op1 == const0_rtx)
8417 return CCGOCmode;
8418 else
8419 /* For other cases Carry flag is not required. */
8420 return CCGCmode;
8421 /* Codes doable only with sign flag when comparing
8422 against zero, but we miss jump instruction for it
8423 so we need to use relational tests agains overflow
8424 that thus needs to be zero. */
8425 case GT: /* ZF=0 & SF=OF */
8426 case LE: /* ZF=1 | SF<>OF */
8427 if (op1 == const0_rtx)
8428 return CCNOmode;
8429 else
8430 return CCGCmode;
8431 /* strcmp pattern do (use flags) and combine may ask us for proper
8432 mode. */
8433 case USE:
8434 return CCmode;
8435 default:
8436 abort ();
8440 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8443 ix86_use_fcomi_compare (code)
8444 enum rtx_code code ATTRIBUTE_UNUSED;
8446 enum rtx_code swapped_code = swap_condition (code);
8447 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8448 || (ix86_fp_comparison_cost (swapped_code)
8449 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8452 /* Swap, force into registers, or otherwise massage the two operands
8453 to a fp comparison. The operands are updated in place; the new
8454 comparsion code is returned. */
8456 static enum rtx_code
8457 ix86_prepare_fp_compare_args (code, pop0, pop1)
8458 enum rtx_code code;
8459 rtx *pop0, *pop1;
8461 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8462 rtx op0 = *pop0, op1 = *pop1;
8463 enum machine_mode op_mode = GET_MODE (op0);
8464 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8466 /* All of the unordered compare instructions only work on registers.
8467 The same is true of the XFmode compare instructions. The same is
8468 true of the fcomi compare instructions. */
8470 if (!is_sse
8471 && (fpcmp_mode == CCFPUmode
8472 || op_mode == XFmode
8473 || op_mode == TFmode
8474 || ix86_use_fcomi_compare (code)))
8476 op0 = force_reg (op_mode, op0);
8477 op1 = force_reg (op_mode, op1);
8479 else
8481 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8482 things around if they appear profitable, otherwise force op0
8483 into a register. */
8485 if (standard_80387_constant_p (op0) == 0
8486 || (GET_CODE (op0) == MEM
8487 && ! (standard_80387_constant_p (op1) == 0
8488 || GET_CODE (op1) == MEM)))
8490 rtx tmp;
8491 tmp = op0, op0 = op1, op1 = tmp;
8492 code = swap_condition (code);
8495 if (GET_CODE (op0) != REG)
8496 op0 = force_reg (op_mode, op0);
8498 if (CONSTANT_P (op1))
8500 if (standard_80387_constant_p (op1))
8501 op1 = force_reg (op_mode, op1);
8502 else
8503 op1 = validize_mem (force_const_mem (op_mode, op1));
8507 /* Try to rearrange the comparison to make it cheaper. */
8508 if (ix86_fp_comparison_cost (code)
8509 > ix86_fp_comparison_cost (swap_condition (code))
8510 && (GET_CODE (op1) == REG || !no_new_pseudos))
8512 rtx tmp;
8513 tmp = op0, op0 = op1, op1 = tmp;
8514 code = swap_condition (code);
8515 if (GET_CODE (op0) != REG)
8516 op0 = force_reg (op_mode, op0);
8519 *pop0 = op0;
8520 *pop1 = op1;
8521 return code;
8524 /* Convert comparison codes we use to represent FP comparison to integer
8525 code that will result in proper branch. Return UNKNOWN if no such code
8526 is available. */
8527 static enum rtx_code
8528 ix86_fp_compare_code_to_integer (code)
8529 enum rtx_code code;
8531 switch (code)
8533 case GT:
8534 return GTU;
8535 case GE:
8536 return GEU;
8537 case ORDERED:
8538 case UNORDERED:
8539 return code;
8540 break;
8541 case UNEQ:
8542 return EQ;
8543 break;
8544 case UNLT:
8545 return LTU;
8546 break;
8547 case UNLE:
8548 return LEU;
8549 break;
8550 case LTGT:
8551 return NE;
8552 break;
8553 default:
8554 return UNKNOWN;
8558 /* Split comparison code CODE into comparisons we can do using branch
8559 instructions. BYPASS_CODE is comparison code for branch that will
8560 branch around FIRST_CODE and SECOND_CODE. If some of branches
8561 is not required, set value to NIL.
8562 We never require more than two branches. */
8563 static void
8564 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8565 enum rtx_code code, *bypass_code, *first_code, *second_code;
8567 *first_code = code;
8568 *bypass_code = NIL;
8569 *second_code = NIL;
8571 /* The fcomi comparison sets flags as follows:
8573 cmp ZF PF CF
8574 > 0 0 0
8575 < 0 0 1
8576 = 1 0 0
8577 un 1 1 1 */
8579 switch (code)
8581 case GT: /* GTU - CF=0 & ZF=0 */
8582 case GE: /* GEU - CF=0 */
8583 case ORDERED: /* PF=0 */
8584 case UNORDERED: /* PF=1 */
8585 case UNEQ: /* EQ - ZF=1 */
8586 case UNLT: /* LTU - CF=1 */
8587 case UNLE: /* LEU - CF=1 | ZF=1 */
8588 case LTGT: /* EQ - ZF=0 */
8589 break;
8590 case LT: /* LTU - CF=1 - fails on unordered */
8591 *first_code = UNLT;
8592 *bypass_code = UNORDERED;
8593 break;
8594 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8595 *first_code = UNLE;
8596 *bypass_code = UNORDERED;
8597 break;
8598 case EQ: /* EQ - ZF=1 - fails on unordered */
8599 *first_code = UNEQ;
8600 *bypass_code = UNORDERED;
8601 break;
8602 case NE: /* NE - ZF=0 - fails on unordered */
8603 *first_code = LTGT;
8604 *second_code = UNORDERED;
8605 break;
8606 case UNGE: /* GEU - CF=0 - fails on unordered */
8607 *first_code = GE;
8608 *second_code = UNORDERED;
8609 break;
8610 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8611 *first_code = GT;
8612 *second_code = UNORDERED;
8613 break;
8614 default:
8615 abort ();
8617 if (!TARGET_IEEE_FP)
8619 *second_code = NIL;
8620 *bypass_code = NIL;
8624 /* Return cost of comparison done fcom + arithmetics operations on AX.
8625 All following functions do use number of instructions as an cost metrics.
8626 In future this should be tweaked to compute bytes for optimize_size and
8627 take into account performance of various instructions on various CPUs. */
8628 static int
8629 ix86_fp_comparison_arithmetics_cost (code)
8630 enum rtx_code code;
8632 if (!TARGET_IEEE_FP)
8633 return 4;
8634 /* The cost of code output by ix86_expand_fp_compare. */
8635 switch (code)
8637 case UNLE:
8638 case UNLT:
8639 case LTGT:
8640 case GT:
8641 case GE:
8642 case UNORDERED:
8643 case ORDERED:
8644 case UNEQ:
8645 return 4;
8646 break;
8647 case LT:
8648 case NE:
8649 case EQ:
8650 case UNGE:
8651 return 5;
8652 break;
8653 case LE:
8654 case UNGT:
8655 return 6;
8656 break;
8657 default:
8658 abort ();
8662 /* Return cost of comparison done using fcomi operation.
8663 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8664 static int
8665 ix86_fp_comparison_fcomi_cost (code)
8666 enum rtx_code code;
8668 enum rtx_code bypass_code, first_code, second_code;
8669 /* Return arbitarily high cost when instruction is not supported - this
8670 prevents gcc from using it. */
8671 if (!TARGET_CMOVE)
8672 return 1024;
8673 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8674 return (bypass_code != NIL || second_code != NIL) + 2;
8677 /* Return cost of comparison done using sahf operation.
8678 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8679 static int
8680 ix86_fp_comparison_sahf_cost (code)
8681 enum rtx_code code;
8683 enum rtx_code bypass_code, first_code, second_code;
8684 /* Return arbitarily high cost when instruction is not preferred - this
8685 avoids gcc from using it. */
8686 if (!TARGET_USE_SAHF && !optimize_size)
8687 return 1024;
8688 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8689 return (bypass_code != NIL || second_code != NIL) + 3;
8692 /* Compute cost of the comparison done using any method.
8693 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8694 static int
8695 ix86_fp_comparison_cost (code)
8696 enum rtx_code code;
8698 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8699 int min;
8701 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8702 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8704 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8705 if (min > sahf_cost)
8706 min = sahf_cost;
8707 if (min > fcomi_cost)
8708 min = fcomi_cost;
8709 return min;
8712 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8714 static rtx
8715 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8716 enum rtx_code code;
8717 rtx op0, op1, scratch;
8718 rtx *second_test;
8719 rtx *bypass_test;
8721 enum machine_mode fpcmp_mode, intcmp_mode;
8722 rtx tmp, tmp2;
8723 int cost = ix86_fp_comparison_cost (code);
8724 enum rtx_code bypass_code, first_code, second_code;
8726 fpcmp_mode = ix86_fp_compare_mode (code);
8727 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8729 if (second_test)
8730 *second_test = NULL_RTX;
8731 if (bypass_test)
8732 *bypass_test = NULL_RTX;
8734 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8736 /* Do fcomi/sahf based test when profitable. */
8737 if ((bypass_code == NIL || bypass_test)
8738 && (second_code == NIL || second_test)
8739 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8741 if (TARGET_CMOVE)
8743 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8744 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8745 tmp);
8746 emit_insn (tmp);
8748 else
8750 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8751 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8752 if (!scratch)
8753 scratch = gen_reg_rtx (HImode);
8754 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8755 emit_insn (gen_x86_sahf_1 (scratch));
8758 /* The FP codes work out to act like unsigned. */
8759 intcmp_mode = fpcmp_mode;
8760 code = first_code;
8761 if (bypass_code != NIL)
8762 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8763 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8764 const0_rtx);
8765 if (second_code != NIL)
8766 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8767 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8768 const0_rtx);
8770 else
8772 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8773 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8774 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8775 if (!scratch)
8776 scratch = gen_reg_rtx (HImode);
8777 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8779 /* In the unordered case, we have to check C2 for NaN's, which
8780 doesn't happen to work out to anything nice combination-wise.
8781 So do some bit twiddling on the value we've got in AH to come
8782 up with an appropriate set of condition codes. */
8784 intcmp_mode = CCNOmode;
8785 switch (code)
8787 case GT:
8788 case UNGT:
8789 if (code == GT || !TARGET_IEEE_FP)
8791 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8792 code = EQ;
8794 else
8796 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8797 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8798 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8799 intcmp_mode = CCmode;
8800 code = GEU;
8802 break;
8803 case LT:
8804 case UNLT:
8805 if (code == LT && TARGET_IEEE_FP)
8807 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8808 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8809 intcmp_mode = CCmode;
8810 code = EQ;
8812 else
8814 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8815 code = NE;
8817 break;
8818 case GE:
8819 case UNGE:
8820 if (code == GE || !TARGET_IEEE_FP)
8822 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8823 code = EQ;
8825 else
8827 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8828 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8829 GEN_INT (0x01)));
8830 code = NE;
8832 break;
8833 case LE:
8834 case UNLE:
8835 if (code == LE && TARGET_IEEE_FP)
8837 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8838 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8839 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8840 intcmp_mode = CCmode;
8841 code = LTU;
8843 else
8845 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8846 code = NE;
8848 break;
8849 case EQ:
8850 case UNEQ:
8851 if (code == EQ && TARGET_IEEE_FP)
8853 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8854 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8855 intcmp_mode = CCmode;
8856 code = EQ;
8858 else
8860 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8861 code = NE;
8862 break;
8864 break;
8865 case NE:
8866 case LTGT:
8867 if (code == NE && TARGET_IEEE_FP)
8869 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8870 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8871 GEN_INT (0x40)));
8872 code = NE;
8874 else
8876 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8877 code = EQ;
8879 break;
8881 case UNORDERED:
8882 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8883 code = NE;
8884 break;
8885 case ORDERED:
8886 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8887 code = EQ;
8888 break;
8890 default:
8891 abort ();
8895 /* Return the test that should be put into the flags user, i.e.
8896 the bcc, scc, or cmov instruction. */
8897 return gen_rtx_fmt_ee (code, VOIDmode,
8898 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8899 const0_rtx);
8903 ix86_expand_compare (code, second_test, bypass_test)
8904 enum rtx_code code;
8905 rtx *second_test, *bypass_test;
8907 rtx op0, op1, ret;
8908 op0 = ix86_compare_op0;
8909 op1 = ix86_compare_op1;
8911 if (second_test)
8912 *second_test = NULL_RTX;
8913 if (bypass_test)
8914 *bypass_test = NULL_RTX;
8916 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8917 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8918 second_test, bypass_test);
8919 else
8920 ret = ix86_expand_int_compare (code, op0, op1);
8922 return ret;
8925 /* Return true if the CODE will result in nontrivial jump sequence. */
8926 bool
8927 ix86_fp_jump_nontrivial_p (code)
8928 enum rtx_code code;
8930 enum rtx_code bypass_code, first_code, second_code;
8931 if (!TARGET_CMOVE)
8932 return true;
8933 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8934 return bypass_code != NIL || second_code != NIL;
8937 void
8938 ix86_expand_branch (code, label)
8939 enum rtx_code code;
8940 rtx label;
8942 rtx tmp;
8944 switch (GET_MODE (ix86_compare_op0))
8946 case QImode:
8947 case HImode:
8948 case SImode:
8949 simple:
8950 tmp = ix86_expand_compare (code, NULL, NULL);
8951 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8952 gen_rtx_LABEL_REF (VOIDmode, label),
8953 pc_rtx);
8954 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8955 return;
8957 case SFmode:
8958 case DFmode:
8959 case XFmode:
8960 case TFmode:
8962 rtvec vec;
8963 int use_fcomi;
8964 enum rtx_code bypass_code, first_code, second_code;
8966 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8967 &ix86_compare_op1);
8969 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8971 /* Check whether we will use the natural sequence with one jump. If
8972 so, we can expand jump early. Otherwise delay expansion by
8973 creating compound insn to not confuse optimizers. */
8974 if (bypass_code == NIL && second_code == NIL
8975 && TARGET_CMOVE)
8977 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8978 gen_rtx_LABEL_REF (VOIDmode, label),
8979 pc_rtx, NULL_RTX);
8981 else
8983 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8984 ix86_compare_op0, ix86_compare_op1);
8985 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8986 gen_rtx_LABEL_REF (VOIDmode, label),
8987 pc_rtx);
8988 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8990 use_fcomi = ix86_use_fcomi_compare (code);
8991 vec = rtvec_alloc (3 + !use_fcomi);
8992 RTVEC_ELT (vec, 0) = tmp;
8993 RTVEC_ELT (vec, 1)
8994 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8995 RTVEC_ELT (vec, 2)
8996 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8997 if (! use_fcomi)
8998 RTVEC_ELT (vec, 3)
8999 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9001 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9003 return;
9006 case DImode:
9007 if (TARGET_64BIT)
9008 goto simple;
9009 /* Expand DImode branch into multiple compare+branch. */
9011 rtx lo[2], hi[2], label2;
9012 enum rtx_code code1, code2, code3;
9014 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9016 tmp = ix86_compare_op0;
9017 ix86_compare_op0 = ix86_compare_op1;
9018 ix86_compare_op1 = tmp;
9019 code = swap_condition (code);
9021 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9022 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9024 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9025 avoid two branches. This costs one extra insn, so disable when
9026 optimizing for size. */
9028 if ((code == EQ || code == NE)
9029 && (!optimize_size
9030 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9032 rtx xor0, xor1;
9034 xor1 = hi[0];
9035 if (hi[1] != const0_rtx)
9036 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9037 NULL_RTX, 0, OPTAB_WIDEN);
9039 xor0 = lo[0];
9040 if (lo[1] != const0_rtx)
9041 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9042 NULL_RTX, 0, OPTAB_WIDEN);
9044 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9045 NULL_RTX, 0, OPTAB_WIDEN);
9047 ix86_compare_op0 = tmp;
9048 ix86_compare_op1 = const0_rtx;
9049 ix86_expand_branch (code, label);
9050 return;
9053 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9054 op1 is a constant and the low word is zero, then we can just
9055 examine the high word. */
9057 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9058 switch (code)
9060 case LT: case LTU: case GE: case GEU:
9061 ix86_compare_op0 = hi[0];
9062 ix86_compare_op1 = hi[1];
9063 ix86_expand_branch (code, label);
9064 return;
9065 default:
9066 break;
9069 /* Otherwise, we need two or three jumps. */
9071 label2 = gen_label_rtx ();
9073 code1 = code;
9074 code2 = swap_condition (code);
9075 code3 = unsigned_condition (code);
9077 switch (code)
9079 case LT: case GT: case LTU: case GTU:
9080 break;
9082 case LE: code1 = LT; code2 = GT; break;
9083 case GE: code1 = GT; code2 = LT; break;
9084 case LEU: code1 = LTU; code2 = GTU; break;
9085 case GEU: code1 = GTU; code2 = LTU; break;
9087 case EQ: code1 = NIL; code2 = NE; break;
9088 case NE: code2 = NIL; break;
9090 default:
9091 abort ();
9095 * a < b =>
9096 * if (hi(a) < hi(b)) goto true;
9097 * if (hi(a) > hi(b)) goto false;
9098 * if (lo(a) < lo(b)) goto true;
9099 * false:
9102 ix86_compare_op0 = hi[0];
9103 ix86_compare_op1 = hi[1];
9105 if (code1 != NIL)
9106 ix86_expand_branch (code1, label);
9107 if (code2 != NIL)
9108 ix86_expand_branch (code2, label2);
9110 ix86_compare_op0 = lo[0];
9111 ix86_compare_op1 = lo[1];
9112 ix86_expand_branch (code3, label);
9114 if (code2 != NIL)
9115 emit_label (label2);
9116 return;
9119 default:
9120 abort ();
9124 /* Split branch based on floating point condition. */
9125 void
9126 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9127 enum rtx_code code;
9128 rtx op1, op2, target1, target2, tmp;
9130 rtx second, bypass;
9131 rtx label = NULL_RTX;
9132 rtx condition;
9133 int bypass_probability = -1, second_probability = -1, probability = -1;
9134 rtx i;
9136 if (target2 != pc_rtx)
9138 rtx tmp = target2;
9139 code = reverse_condition_maybe_unordered (code);
9140 target2 = target1;
9141 target1 = tmp;
9144 condition = ix86_expand_fp_compare (code, op1, op2,
9145 tmp, &second, &bypass);
9147 if (split_branch_probability >= 0)
9149 /* Distribute the probabilities across the jumps.
9150 Assume the BYPASS and SECOND to be always test
9151 for UNORDERED. */
9152 probability = split_branch_probability;
9154 /* Value of 1 is low enough to make no need for probability
9155 to be updated. Later we may run some experiments and see
9156 if unordered values are more frequent in practice. */
9157 if (bypass)
9158 bypass_probability = 1;
9159 if (second)
9160 second_probability = 1;
9162 if (bypass != NULL_RTX)
9164 label = gen_label_rtx ();
9165 i = emit_jump_insn (gen_rtx_SET
9166 (VOIDmode, pc_rtx,
9167 gen_rtx_IF_THEN_ELSE (VOIDmode,
9168 bypass,
9169 gen_rtx_LABEL_REF (VOIDmode,
9170 label),
9171 pc_rtx)));
9172 if (bypass_probability >= 0)
9173 REG_NOTES (i)
9174 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9175 GEN_INT (bypass_probability),
9176 REG_NOTES (i));
9178 i = emit_jump_insn (gen_rtx_SET
9179 (VOIDmode, pc_rtx,
9180 gen_rtx_IF_THEN_ELSE (VOIDmode,
9181 condition, target1, target2)));
9182 if (probability >= 0)
9183 REG_NOTES (i)
9184 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9185 GEN_INT (probability),
9186 REG_NOTES (i));
9187 if (second != NULL_RTX)
9189 i = emit_jump_insn (gen_rtx_SET
9190 (VOIDmode, pc_rtx,
9191 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9192 target2)));
9193 if (second_probability >= 0)
9194 REG_NOTES (i)
9195 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9196 GEN_INT (second_probability),
9197 REG_NOTES (i));
9199 if (label != NULL_RTX)
9200 emit_label (label);
9204 ix86_expand_setcc (code, dest)
9205 enum rtx_code code;
9206 rtx dest;
9208 rtx ret, tmp, tmpreg;
9209 rtx second_test, bypass_test;
9211 if (GET_MODE (ix86_compare_op0) == DImode
9212 && !TARGET_64BIT)
9213 return 0; /* FAIL */
9215 if (GET_MODE (dest) != QImode)
9216 abort ();
9218 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9219 PUT_MODE (ret, QImode);
9221 tmp = dest;
9222 tmpreg = dest;
9224 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9225 if (bypass_test || second_test)
9227 rtx test = second_test;
9228 int bypass = 0;
9229 rtx tmp2 = gen_reg_rtx (QImode);
9230 if (bypass_test)
9232 if (second_test)
9233 abort ();
9234 test = bypass_test;
9235 bypass = 1;
9236 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9238 PUT_MODE (test, QImode);
9239 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9241 if (bypass)
9242 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9243 else
9244 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9247 return 1; /* DONE */
9251 ix86_expand_int_movcc (operands)
9252 rtx operands[];
9254 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9255 rtx compare_seq, compare_op;
9256 rtx second_test, bypass_test;
9257 enum machine_mode mode = GET_MODE (operands[0]);
9259 /* When the compare code is not LTU or GEU, we can not use sbbl case.
9260 In case comparsion is done with immediate, we can convert it to LTU or
9261 GEU by altering the integer. */
9263 if ((code == LEU || code == GTU)
9264 && GET_CODE (ix86_compare_op1) == CONST_INT
9265 && mode != HImode
9266 && INTVAL (ix86_compare_op1) != -1
9267 /* For x86-64, the immediate field in the instruction is 32-bit
9268 signed, so we can't increment a DImode value above 0x7fffffff. */
9269 && (!TARGET_64BIT
9270 || GET_MODE (ix86_compare_op0) != DImode
9271 || INTVAL (ix86_compare_op1) != 0x7fffffff)
9272 && GET_CODE (operands[2]) == CONST_INT
9273 && GET_CODE (operands[3]) == CONST_INT)
9275 if (code == LEU)
9276 code = LTU;
9277 else
9278 code = GEU;
9279 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
9280 GET_MODE (ix86_compare_op0));
9283 start_sequence ();
9284 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9285 compare_seq = get_insns ();
9286 end_sequence ();
9288 compare_code = GET_CODE (compare_op);
9290 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9291 HImode insns, we'd be swallowed in word prefix ops. */
9293 if (mode != HImode
9294 && (mode != DImode || TARGET_64BIT)
9295 && GET_CODE (operands[2]) == CONST_INT
9296 && GET_CODE (operands[3]) == CONST_INT)
9298 rtx out = operands[0];
9299 HOST_WIDE_INT ct = INTVAL (operands[2]);
9300 HOST_WIDE_INT cf = INTVAL (operands[3]);
9301 HOST_WIDE_INT diff;
9303 if ((compare_code == LTU || compare_code == GEU)
9304 && !second_test && !bypass_test)
9306 /* Detect overlap between destination and compare sources. */
9307 rtx tmp = out;
9309 /* To simplify rest of code, restrict to the GEU case. */
9310 if (compare_code == LTU)
9312 HOST_WIDE_INT tmp = ct;
9313 ct = cf;
9314 cf = tmp;
9315 compare_code = reverse_condition (compare_code);
9316 code = reverse_condition (code);
9318 diff = ct - cf;
9320 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9321 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9322 tmp = gen_reg_rtx (mode);
9324 emit_insn (compare_seq);
9325 if (mode == DImode)
9326 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9327 else
9328 emit_insn (gen_x86_movsicc_0_m1 (tmp));
9330 if (diff == 1)
9333 * cmpl op0,op1
9334 * sbbl dest,dest
9335 * [addl dest, ct]
9337 * Size 5 - 8.
9339 if (ct)
9340 tmp = expand_simple_binop (mode, PLUS,
9341 tmp, GEN_INT (ct),
9342 tmp, 1, OPTAB_DIRECT);
9344 else if (cf == -1)
9347 * cmpl op0,op1
9348 * sbbl dest,dest
9349 * orl $ct, dest
9351 * Size 8.
9353 tmp = expand_simple_binop (mode, IOR,
9354 tmp, GEN_INT (ct),
9355 tmp, 1, OPTAB_DIRECT);
9357 else if (diff == -1 && ct)
9360 * cmpl op0,op1
9361 * sbbl dest,dest
9362 * notl dest
9363 * [addl dest, cf]
9365 * Size 8 - 11.
9367 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9368 if (cf)
9369 tmp = expand_simple_binop (mode, PLUS,
9370 tmp, GEN_INT (cf),
9371 tmp, 1, OPTAB_DIRECT);
9373 else
9376 * cmpl op0,op1
9377 * sbbl dest,dest
9378 * [notl dest]
9379 * andl cf - ct, dest
9380 * [addl dest, ct]
9382 * Size 8 - 11.
9385 if (cf == 0)
9387 cf = ct;
9388 ct = 0;
9389 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9392 tmp = expand_simple_binop (mode, AND,
9393 tmp,
9394 gen_int_mode (cf - ct, mode),
9395 tmp, 1, OPTAB_DIRECT);
9396 if (ct)
9397 tmp = expand_simple_binop (mode, PLUS,
9398 tmp, GEN_INT (ct),
9399 tmp, 1, OPTAB_DIRECT);
9402 if (tmp != out)
9403 emit_move_insn (out, tmp);
9405 return 1; /* DONE */
9408 diff = ct - cf;
9409 if (diff < 0)
9411 HOST_WIDE_INT tmp;
9412 tmp = ct, ct = cf, cf = tmp;
9413 diff = -diff;
9414 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9416 /* We may be reversing unordered compare to normal compare, that
9417 is not valid in general (we may convert non-trapping condition
9418 to trapping one), however on i386 we currently emit all
9419 comparisons unordered. */
9420 compare_code = reverse_condition_maybe_unordered (compare_code);
9421 code = reverse_condition_maybe_unordered (code);
9423 else
9425 compare_code = reverse_condition (compare_code);
9426 code = reverse_condition (code);
9430 compare_code = NIL;
9431 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9432 && GET_CODE (ix86_compare_op1) == CONST_INT)
9434 if (ix86_compare_op1 == const0_rtx
9435 && (code == LT || code == GE))
9436 compare_code = code;
9437 else if (ix86_compare_op1 == constm1_rtx)
9439 if (code == LE)
9440 compare_code = LT;
9441 else if (code == GT)
9442 compare_code = GE;
9446 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9447 if (compare_code != NIL
9448 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9449 && (cf == -1 || ct == -1))
9451 /* If lea code below could be used, only optimize
9452 if it results in a 2 insn sequence. */
9454 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9455 || diff == 3 || diff == 5 || diff == 9)
9456 || (compare_code == LT && ct == -1)
9457 || (compare_code == GE && cf == -1))
9460 * notl op1 (if necessary)
9461 * sarl $31, op1
9462 * orl cf, op1
9464 if (ct != -1)
9466 cf = ct;
9467 ct = -1;
9468 code = reverse_condition (code);
9471 out = emit_store_flag (out, code, ix86_compare_op0,
9472 ix86_compare_op1, VOIDmode, 0, -1);
9474 out = expand_simple_binop (mode, IOR,
9475 out, GEN_INT (cf),
9476 out, 1, OPTAB_DIRECT);
9477 if (out != operands[0])
9478 emit_move_insn (operands[0], out);
9480 return 1; /* DONE */
9484 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9485 || diff == 3 || diff == 5 || diff == 9)
9486 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9489 * xorl dest,dest
9490 * cmpl op1,op2
9491 * setcc dest
9492 * lea cf(dest*(ct-cf)),dest
9494 * Size 14.
9496 * This also catches the degenerate setcc-only case.
9499 rtx tmp;
9500 int nops;
9502 out = emit_store_flag (out, code, ix86_compare_op0,
9503 ix86_compare_op1, VOIDmode, 0, 1);
9505 nops = 0;
9506 /* On x86_64 the lea instruction operates on Pmode, so we need
9507 to get arithmetics done in proper mode to match. */
9508 if (diff == 1)
9509 tmp = copy_rtx (out);
9510 else
9512 rtx out1;
9513 out1 = copy_rtx (out);
9514 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9515 nops++;
9516 if (diff & 1)
9518 tmp = gen_rtx_PLUS (mode, tmp, out1);
9519 nops++;
9522 if (cf != 0)
9524 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9525 nops++;
9527 if (tmp != out
9528 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9530 if (nops == 1)
9531 out = force_operand (tmp, copy_rtx (out));
9532 else
9533 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9535 if (out != operands[0])
9536 emit_move_insn (operands[0], copy_rtx (out));
9538 return 1; /* DONE */
9542 * General case: Jumpful:
9543 * xorl dest,dest cmpl op1, op2
9544 * cmpl op1, op2 movl ct, dest
9545 * setcc dest jcc 1f
9546 * decl dest movl cf, dest
9547 * andl (cf-ct),dest 1:
9548 * addl ct,dest
9550 * Size 20. Size 14.
9552 * This is reasonably steep, but branch mispredict costs are
9553 * high on modern cpus, so consider failing only if optimizing
9554 * for space.
9556 * %%% Parameterize branch_cost on the tuning architecture, then
9557 * use that. The 80386 couldn't care less about mispredicts.
9560 if (!optimize_size && !TARGET_CMOVE)
9562 if (cf == 0)
9564 cf = ct;
9565 ct = 0;
9566 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9567 /* We may be reversing unordered compare to normal compare,
9568 that is not valid in general (we may convert non-trapping
9569 condition to trapping one), however on i386 we currently
9570 emit all comparisons unordered. */
9571 code = reverse_condition_maybe_unordered (code);
9572 else
9574 code = reverse_condition (code);
9575 if (compare_code != NIL)
9576 compare_code = reverse_condition (compare_code);
9580 if (compare_code != NIL)
9582 /* notl op1 (if needed)
9583 sarl $31, op1
9584 andl (cf-ct), op1
9585 addl ct, op1
9587 For x < 0 (resp. x <= -1) there will be no notl,
9588 so if possible swap the constants to get rid of the
9589 complement.
9590 True/false will be -1/0 while code below (store flag
9591 followed by decrement) is 0/-1, so the constants need
9592 to be exchanged once more. */
9594 if (compare_code == GE || !cf)
9596 code = reverse_condition (code);
9597 compare_code = LT;
9599 else
9601 HOST_WIDE_INT tmp = cf;
9602 cf = ct;
9603 ct = tmp;
9606 out = emit_store_flag (out, code, ix86_compare_op0,
9607 ix86_compare_op1, VOIDmode, 0, -1);
9609 else
9611 out = emit_store_flag (out, code, ix86_compare_op0,
9612 ix86_compare_op1, VOIDmode, 0, 1);
9614 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9615 out, 1, OPTAB_DIRECT);
9618 out = expand_simple_binop (mode, AND, out,
9619 gen_int_mode (cf - ct, mode),
9620 out, 1, OPTAB_DIRECT);
9621 if (ct)
9622 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9623 out, 1, OPTAB_DIRECT);
9624 if (out != operands[0])
9625 emit_move_insn (operands[0], out);
9627 return 1; /* DONE */
9631 if (!TARGET_CMOVE)
9633 /* Try a few things more with specific constants and a variable. */
9635 optab op;
9636 rtx var, orig_out, out, tmp;
9638 if (optimize_size)
9639 return 0; /* FAIL */
9641 /* If one of the two operands is an interesting constant, load a
9642 constant with the above and mask it in with a logical operation. */
9644 if (GET_CODE (operands[2]) == CONST_INT)
9646 var = operands[3];
9647 if (INTVAL (operands[2]) == 0)
9648 operands[3] = constm1_rtx, op = and_optab;
9649 else if (INTVAL (operands[2]) == -1)
9650 operands[3] = const0_rtx, op = ior_optab;
9651 else
9652 return 0; /* FAIL */
9654 else if (GET_CODE (operands[3]) == CONST_INT)
9656 var = operands[2];
9657 if (INTVAL (operands[3]) == 0)
9658 operands[2] = constm1_rtx, op = and_optab;
9659 else if (INTVAL (operands[3]) == -1)
9660 operands[2] = const0_rtx, op = ior_optab;
9661 else
9662 return 0; /* FAIL */
9664 else
9665 return 0; /* FAIL */
9667 orig_out = operands[0];
9668 tmp = gen_reg_rtx (mode);
9669 operands[0] = tmp;
9671 /* Recurse to get the constant loaded. */
9672 if (ix86_expand_int_movcc (operands) == 0)
9673 return 0; /* FAIL */
9675 /* Mask in the interesting variable. */
9676 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9677 OPTAB_WIDEN);
9678 if (out != orig_out)
9679 emit_move_insn (orig_out, out);
9681 return 1; /* DONE */
9685 * For comparison with above,
9687 * movl cf,dest
9688 * movl ct,tmp
9689 * cmpl op1,op2
9690 * cmovcc tmp,dest
9692 * Size 15.
9695 if (! nonimmediate_operand (operands[2], mode))
9696 operands[2] = force_reg (mode, operands[2]);
9697 if (! nonimmediate_operand (operands[3], mode))
9698 operands[3] = force_reg (mode, operands[3]);
9700 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9702 rtx tmp = gen_reg_rtx (mode);
9703 emit_move_insn (tmp, operands[3]);
9704 operands[3] = tmp;
9706 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9708 rtx tmp = gen_reg_rtx (mode);
9709 emit_move_insn (tmp, operands[2]);
9710 operands[2] = tmp;
9712 if (! register_operand (operands[2], VOIDmode)
9713 && ! register_operand (operands[3], VOIDmode))
9714 operands[2] = force_reg (mode, operands[2]);
9716 emit_insn (compare_seq);
9717 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9718 gen_rtx_IF_THEN_ELSE (mode,
9719 compare_op, operands[2],
9720 operands[3])));
9721 if (bypass_test)
9722 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9723 gen_rtx_IF_THEN_ELSE (mode,
9724 bypass_test,
9725 operands[3],
9726 operands[0])));
9727 if (second_test)
9728 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9729 gen_rtx_IF_THEN_ELSE (mode,
9730 second_test,
9731 operands[2],
9732 operands[0])));
9734 return 1; /* DONE */
9738 ix86_expand_fp_movcc (operands)
9739 rtx operands[];
9741 enum rtx_code code;
9742 rtx tmp;
9743 rtx compare_op, second_test, bypass_test;
9745 /* For SF/DFmode conditional moves based on comparisons
9746 in same mode, we may want to use SSE min/max instructions. */
9747 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9748 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9749 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9750 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9751 && (!TARGET_IEEE_FP
9752 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9753 /* We may be called from the post-reload splitter. */
9754 && (!REG_P (operands[0])
9755 || SSE_REG_P (operands[0])
9756 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9758 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9759 code = GET_CODE (operands[1]);
9761 /* See if we have (cross) match between comparison operands and
9762 conditional move operands. */
9763 if (rtx_equal_p (operands[2], op1))
9765 rtx tmp = op0;
9766 op0 = op1;
9767 op1 = tmp;
9768 code = reverse_condition_maybe_unordered (code);
9770 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9772 /* Check for min operation. */
9773 if (code == LT)
9775 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9776 if (memory_operand (op0, VOIDmode))
9777 op0 = force_reg (GET_MODE (operands[0]), op0);
9778 if (GET_MODE (operands[0]) == SFmode)
9779 emit_insn (gen_minsf3 (operands[0], op0, op1));
9780 else
9781 emit_insn (gen_mindf3 (operands[0], op0, op1));
9782 return 1;
9784 /* Check for max operation. */
9785 if (code == GT)
9787 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9788 if (memory_operand (op0, VOIDmode))
9789 op0 = force_reg (GET_MODE (operands[0]), op0);
9790 if (GET_MODE (operands[0]) == SFmode)
9791 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9792 else
9793 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9794 return 1;
9797 /* Manage condition to be sse_comparison_operator. In case we are
9798 in non-ieee mode, try to canonicalize the destination operand
9799 to be first in the comparison - this helps reload to avoid extra
9800 moves. */
9801 if (!sse_comparison_operator (operands[1], VOIDmode)
9802 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9804 rtx tmp = ix86_compare_op0;
9805 ix86_compare_op0 = ix86_compare_op1;
9806 ix86_compare_op1 = tmp;
9807 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9808 VOIDmode, ix86_compare_op0,
9809 ix86_compare_op1);
9811 /* Similary try to manage result to be first operand of conditional
9812 move. We also don't support the NE comparison on SSE, so try to
9813 avoid it. */
9814 if ((rtx_equal_p (operands[0], operands[3])
9815 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9816 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9818 rtx tmp = operands[2];
9819 operands[2] = operands[3];
9820 operands[3] = tmp;
9821 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9822 (GET_CODE (operands[1])),
9823 VOIDmode, ix86_compare_op0,
9824 ix86_compare_op1);
9826 if (GET_MODE (operands[0]) == SFmode)
9827 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9828 operands[2], operands[3],
9829 ix86_compare_op0, ix86_compare_op1));
9830 else
9831 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9832 operands[2], operands[3],
9833 ix86_compare_op0, ix86_compare_op1));
9834 return 1;
9837 /* The floating point conditional move instructions don't directly
9838 support conditions resulting from a signed integer comparison. */
9840 code = GET_CODE (operands[1]);
9841 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9843 /* The floating point conditional move instructions don't directly
9844 support signed integer comparisons. */
9846 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9848 if (second_test != NULL || bypass_test != NULL)
9849 abort ();
9850 tmp = gen_reg_rtx (QImode);
9851 ix86_expand_setcc (code, tmp);
9852 code = NE;
9853 ix86_compare_op0 = tmp;
9854 ix86_compare_op1 = const0_rtx;
9855 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9857 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9859 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9860 emit_move_insn (tmp, operands[3]);
9861 operands[3] = tmp;
9863 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9865 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9866 emit_move_insn (tmp, operands[2]);
9867 operands[2] = tmp;
9870 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9871 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9872 compare_op,
9873 operands[2],
9874 operands[3])));
9875 if (bypass_test)
9876 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9877 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9878 bypass_test,
9879 operands[3],
9880 operands[0])));
9881 if (second_test)
9882 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9883 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9884 second_test,
9885 operands[2],
9886 operands[0])));
9888 return 1;
9891 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9892 works for floating pointer parameters and nonoffsetable memories.
9893 For pushes, it returns just stack offsets; the values will be saved
9894 in the right order. Maximally three parts are generated. */
9896 static int
9897 ix86_split_to_parts (operand, parts, mode)
9898 rtx operand;
9899 rtx *parts;
9900 enum machine_mode mode;
9902 int size;
9904 if (!TARGET_64BIT)
9905 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9906 else
9907 size = (GET_MODE_SIZE (mode) + 4) / 8;
9909 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9910 abort ();
9911 if (size < 2 || size > 3)
9912 abort ();
9914 /* Optimize constant pool reference to immediates. This is used by fp
9915 moves, that force all constants to memory to allow combining. */
9916 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9918 rtx tmp = maybe_get_pool_constant (operand);
9919 if (tmp)
9920 operand = tmp;
9923 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9925 /* The only non-offsetable memories we handle are pushes. */
9926 if (! push_operand (operand, VOIDmode))
9927 abort ();
9929 operand = copy_rtx (operand);
9930 PUT_MODE (operand, Pmode);
9931 parts[0] = parts[1] = parts[2] = operand;
9933 else if (!TARGET_64BIT)
9935 if (mode == DImode)
9936 split_di (&operand, 1, &parts[0], &parts[1]);
9937 else
9939 if (REG_P (operand))
9941 if (!reload_completed)
9942 abort ();
9943 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9944 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9945 if (size == 3)
9946 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9948 else if (offsettable_memref_p (operand))
9950 operand = adjust_address (operand, SImode, 0);
9951 parts[0] = operand;
9952 parts[1] = adjust_address (operand, SImode, 4);
9953 if (size == 3)
9954 parts[2] = adjust_address (operand, SImode, 8);
9956 else if (GET_CODE (operand) == CONST_DOUBLE)
9958 REAL_VALUE_TYPE r;
9959 long l[4];
9961 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9962 switch (mode)
9964 case XFmode:
9965 case TFmode:
9966 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9967 parts[2] = gen_int_mode (l[2], SImode);
9968 break;
9969 case DFmode:
9970 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9971 break;
9972 default:
9973 abort ();
9975 parts[1] = gen_int_mode (l[1], SImode);
9976 parts[0] = gen_int_mode (l[0], SImode);
9978 else
9979 abort ();
9982 else
9984 if (mode == TImode)
9985 split_ti (&operand, 1, &parts[0], &parts[1]);
9986 if (mode == XFmode || mode == TFmode)
9988 if (REG_P (operand))
9990 if (!reload_completed)
9991 abort ();
9992 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9993 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9995 else if (offsettable_memref_p (operand))
9997 operand = adjust_address (operand, DImode, 0);
9998 parts[0] = operand;
9999 parts[1] = adjust_address (operand, SImode, 8);
10001 else if (GET_CODE (operand) == CONST_DOUBLE)
10003 REAL_VALUE_TYPE r;
10004 long l[3];
10006 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10007 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10008 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10009 if (HOST_BITS_PER_WIDE_INT >= 64)
10010 parts[0]
10011 = gen_int_mode
10012 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10013 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10014 DImode);
10015 else
10016 parts[0] = immed_double_const (l[0], l[1], DImode);
10017 parts[1] = gen_int_mode (l[2], SImode);
10019 else
10020 abort ();
10024 return size;
10027 /* Emit insns to perform a move or push of DI, DF, and XF values.
10028 Return false when normal moves are needed; true when all required
10029 insns have been emitted. Operands 2-4 contain the input values
10030 int the correct order; operands 5-7 contain the output values. */
10032 void
10033 ix86_split_long_move (operands)
10034 rtx operands[];
10036 rtx part[2][3];
10037 int nparts;
10038 int push = 0;
10039 int collisions = 0;
10040 enum machine_mode mode = GET_MODE (operands[0]);
10042 /* The DFmode expanders may ask us to move double.
10043 For 64bit target this is single move. By hiding the fact
10044 here we simplify i386.md splitters. */
10045 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10047 /* Optimize constant pool reference to immediates. This is used by
10048 fp moves, that force all constants to memory to allow combining. */
10050 if (GET_CODE (operands[1]) == MEM
10051 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10052 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10053 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10054 if (push_operand (operands[0], VOIDmode))
10056 operands[0] = copy_rtx (operands[0]);
10057 PUT_MODE (operands[0], Pmode);
10059 else
10060 operands[0] = gen_lowpart (DImode, operands[0]);
10061 operands[1] = gen_lowpart (DImode, operands[1]);
10062 emit_move_insn (operands[0], operands[1]);
10063 return;
10066 /* The only non-offsettable memory we handle is push. */
10067 if (push_operand (operands[0], VOIDmode))
10068 push = 1;
10069 else if (GET_CODE (operands[0]) == MEM
10070 && ! offsettable_memref_p (operands[0]))
10071 abort ();
10073 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10074 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10076 /* When emitting push, take care for source operands on the stack. */
10077 if (push && GET_CODE (operands[1]) == MEM
10078 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10080 if (nparts == 3)
10081 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10082 XEXP (part[1][2], 0));
10083 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10084 XEXP (part[1][1], 0));
10087 /* We need to do copy in the right order in case an address register
10088 of the source overlaps the destination. */
10089 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10091 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10092 collisions++;
10093 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10094 collisions++;
10095 if (nparts == 3
10096 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10097 collisions++;
10099 /* Collision in the middle part can be handled by reordering. */
10100 if (collisions == 1 && nparts == 3
10101 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10103 rtx tmp;
10104 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10105 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10108 /* If there are more collisions, we can't handle it by reordering.
10109 Do an lea to the last part and use only one colliding move. */
10110 else if (collisions > 1)
10112 rtx base;
10114 collisions = 1;
10116 base = part[0][nparts - 1];
10118 /* Handle the case when the last part isn't valid for lea.
10119 Happens in 64-bit mode storing the 12-byte XFmode. */
10120 if (GET_MODE (base) != Pmode)
10121 base = gen_rtx_REG (Pmode, REGNO (base));
10123 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10124 part[1][0] = replace_equiv_address (part[1][0], base);
10125 part[1][1] = replace_equiv_address (part[1][1],
10126 plus_constant (base, UNITS_PER_WORD));
10127 if (nparts == 3)
10128 part[1][2] = replace_equiv_address (part[1][2],
10129 plus_constant (base, 8));
10133 if (push)
10135 if (!TARGET_64BIT)
10137 if (nparts == 3)
10139 /* We use only first 12 bytes of TFmode value, but for pushing we
10140 are required to adjust stack as if we were pushing real 16byte
10141 value. */
10142 if (mode == TFmode && !TARGET_64BIT)
10143 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10144 GEN_INT (-4)));
10145 emit_move_insn (part[0][2], part[1][2]);
10148 else
10150 /* In 64bit mode we don't have 32bit push available. In case this is
10151 register, it is OK - we will just use larger counterpart. We also
10152 retype memory - these comes from attempt to avoid REX prefix on
10153 moving of second half of TFmode value. */
10154 if (GET_MODE (part[1][1]) == SImode)
10156 if (GET_CODE (part[1][1]) == MEM)
10157 part[1][1] = adjust_address (part[1][1], DImode, 0);
10158 else if (REG_P (part[1][1]))
10159 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10160 else
10161 abort ();
10162 if (GET_MODE (part[1][0]) == SImode)
10163 part[1][0] = part[1][1];
10166 emit_move_insn (part[0][1], part[1][1]);
10167 emit_move_insn (part[0][0], part[1][0]);
10168 return;
10171 /* Choose correct order to not overwrite the source before it is copied. */
10172 if ((REG_P (part[0][0])
10173 && REG_P (part[1][1])
10174 && (REGNO (part[0][0]) == REGNO (part[1][1])
10175 || (nparts == 3
10176 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10177 || (collisions > 0
10178 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10180 if (nparts == 3)
10182 operands[2] = part[0][2];
10183 operands[3] = part[0][1];
10184 operands[4] = part[0][0];
10185 operands[5] = part[1][2];
10186 operands[6] = part[1][1];
10187 operands[7] = part[1][0];
10189 else
10191 operands[2] = part[0][1];
10192 operands[3] = part[0][0];
10193 operands[5] = part[1][1];
10194 operands[6] = part[1][0];
10197 else
10199 if (nparts == 3)
10201 operands[2] = part[0][0];
10202 operands[3] = part[0][1];
10203 operands[4] = part[0][2];
10204 operands[5] = part[1][0];
10205 operands[6] = part[1][1];
10206 operands[7] = part[1][2];
10208 else
10210 operands[2] = part[0][0];
10211 operands[3] = part[0][1];
10212 operands[5] = part[1][0];
10213 operands[6] = part[1][1];
10216 emit_move_insn (operands[2], operands[5]);
10217 emit_move_insn (operands[3], operands[6]);
10218 if (nparts == 3)
10219 emit_move_insn (operands[4], operands[7]);
10221 return;
10224 void
10225 ix86_split_ashldi (operands, scratch)
10226 rtx *operands, scratch;
10228 rtx low[2], high[2];
10229 int count;
10231 if (GET_CODE (operands[2]) == CONST_INT)
10233 split_di (operands, 2, low, high);
10234 count = INTVAL (operands[2]) & 63;
10236 if (count >= 32)
10238 emit_move_insn (high[0], low[1]);
10239 emit_move_insn (low[0], const0_rtx);
10241 if (count > 32)
10242 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10244 else
10246 if (!rtx_equal_p (operands[0], operands[1]))
10247 emit_move_insn (operands[0], operands[1]);
10248 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10249 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10252 else
10254 if (!rtx_equal_p (operands[0], operands[1]))
10255 emit_move_insn (operands[0], operands[1]);
10257 split_di (operands, 1, low, high);
10259 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10260 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10262 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10264 if (! no_new_pseudos)
10265 scratch = force_reg (SImode, const0_rtx);
10266 else
10267 emit_move_insn (scratch, const0_rtx);
10269 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10270 scratch));
10272 else
10273 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10277 void
10278 ix86_split_ashrdi (operands, scratch)
10279 rtx *operands, scratch;
10281 rtx low[2], high[2];
10282 int count;
10284 if (GET_CODE (operands[2]) == CONST_INT)
10286 split_di (operands, 2, low, high);
10287 count = INTVAL (operands[2]) & 63;
10289 if (count >= 32)
10291 emit_move_insn (low[0], high[1]);
10293 if (! reload_completed)
10294 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10295 else
10297 emit_move_insn (high[0], low[0]);
10298 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10301 if (count > 32)
10302 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10304 else
10306 if (!rtx_equal_p (operands[0], operands[1]))
10307 emit_move_insn (operands[0], operands[1]);
10308 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10309 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10312 else
10314 if (!rtx_equal_p (operands[0], operands[1]))
10315 emit_move_insn (operands[0], operands[1]);
10317 split_di (operands, 1, low, high);
10319 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10320 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10322 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10324 if (! no_new_pseudos)
10325 scratch = gen_reg_rtx (SImode);
10326 emit_move_insn (scratch, high[0]);
10327 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10328 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10329 scratch));
10331 else
10332 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10336 void
10337 ix86_split_lshrdi (operands, scratch)
10338 rtx *operands, scratch;
10340 rtx low[2], high[2];
10341 int count;
10343 if (GET_CODE (operands[2]) == CONST_INT)
10345 split_di (operands, 2, low, high);
10346 count = INTVAL (operands[2]) & 63;
10348 if (count >= 32)
10350 emit_move_insn (low[0], high[1]);
10351 emit_move_insn (high[0], const0_rtx);
10353 if (count > 32)
10354 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10356 else
10358 if (!rtx_equal_p (operands[0], operands[1]))
10359 emit_move_insn (operands[0], operands[1]);
10360 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10361 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10364 else
10366 if (!rtx_equal_p (operands[0], operands[1]))
10367 emit_move_insn (operands[0], operands[1]);
10369 split_di (operands, 1, low, high);
10371 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10372 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10374 /* Heh. By reversing the arguments, we can reuse this pattern. */
10375 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10377 if (! no_new_pseudos)
10378 scratch = force_reg (SImode, const0_rtx);
10379 else
10380 emit_move_insn (scratch, const0_rtx);
10382 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10383 scratch));
10385 else
10386 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10390 /* Helper function for the string operations below. Dest VARIABLE whether
10391 it is aligned to VALUE bytes. If true, jump to the label. */
10392 static rtx
10393 ix86_expand_aligntest (variable, value)
10394 rtx variable;
10395 int value;
10397 rtx label = gen_label_rtx ();
10398 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10399 if (GET_MODE (variable) == DImode)
10400 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10401 else
10402 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10403 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10404 1, label);
10405 return label;
10408 /* Adjust COUNTER by the VALUE. */
10409 static void
10410 ix86_adjust_counter (countreg, value)
10411 rtx countreg;
10412 HOST_WIDE_INT value;
10414 if (GET_MODE (countreg) == DImode)
10415 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10416 else
10417 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10420 /* Zero extend possibly SImode EXP to Pmode register. */
10422 ix86_zero_extend_to_Pmode (exp)
10423 rtx exp;
10425 rtx r;
10426 if (GET_MODE (exp) == VOIDmode)
10427 return force_reg (Pmode, exp);
10428 if (GET_MODE (exp) == Pmode)
10429 return copy_to_mode_reg (Pmode, exp);
10430 r = gen_reg_rtx (Pmode);
10431 emit_insn (gen_zero_extendsidi2 (r, exp));
10432 return r;
10435 /* Expand string move (memcpy) operation. Use i386 string operations when
10436 profitable. expand_clrstr contains similar code. */
10438 ix86_expand_movstr (dst, src, count_exp, align_exp)
10439 rtx dst, src, count_exp, align_exp;
10441 rtx srcreg, destreg, countreg;
10442 enum machine_mode counter_mode;
10443 HOST_WIDE_INT align = 0;
10444 unsigned HOST_WIDE_INT count = 0;
10445 rtx insns;
10447 start_sequence ();
10449 if (GET_CODE (align_exp) == CONST_INT)
10450 align = INTVAL (align_exp);
10452 /* This simple hack avoids all inlining code and simplifies code below. */
10453 if (!TARGET_ALIGN_STRINGOPS)
10454 align = 64;
10456 if (GET_CODE (count_exp) == CONST_INT)
10457 count = INTVAL (count_exp);
10459 /* Figure out proper mode for counter. For 32bits it is always SImode,
10460 for 64bits use SImode when possible, otherwise DImode.
10461 Set count to number of bytes copied when known at compile time. */
10462 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10463 || x86_64_zero_extended_value (count_exp))
10464 counter_mode = SImode;
10465 else
10466 counter_mode = DImode;
10468 if (counter_mode != SImode && counter_mode != DImode)
10469 abort ();
10471 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10472 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10474 emit_insn (gen_cld ());
10476 /* When optimizing for size emit simple rep ; movsb instruction for
10477 counts not divisible by 4. */
10479 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10481 countreg = ix86_zero_extend_to_Pmode (count_exp);
10482 if (TARGET_64BIT)
10483 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10484 destreg, srcreg, countreg));
10485 else
10486 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10487 destreg, srcreg, countreg));
10490 /* For constant aligned (or small unaligned) copies use rep movsl
10491 followed by code copying the rest. For PentiumPro ensure 8 byte
10492 alignment to allow rep movsl acceleration. */
10494 else if (count != 0
10495 && (align >= 8
10496 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10497 || optimize_size || count < (unsigned int) 64))
10499 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10500 if (count & ~(size - 1))
10502 countreg = copy_to_mode_reg (counter_mode,
10503 GEN_INT ((count >> (size == 4 ? 2 : 3))
10504 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10505 countreg = ix86_zero_extend_to_Pmode (countreg);
10506 if (size == 4)
10508 if (TARGET_64BIT)
10509 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10510 destreg, srcreg, countreg));
10511 else
10512 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10513 destreg, srcreg, countreg));
10515 else
10516 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10517 destreg, srcreg, countreg));
10519 if (size == 8 && (count & 0x04))
10520 emit_insn (gen_strmovsi (destreg, srcreg));
10521 if (count & 0x02)
10522 emit_insn (gen_strmovhi (destreg, srcreg));
10523 if (count & 0x01)
10524 emit_insn (gen_strmovqi (destreg, srcreg));
10526 /* The generic code based on the glibc implementation:
10527 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10528 allowing accelerated copying there)
10529 - copy the data using rep movsl
10530 - copy the rest. */
10531 else
10533 rtx countreg2;
10534 rtx label = NULL;
10535 int desired_alignment = (TARGET_PENTIUMPRO
10536 && (count == 0 || count >= (unsigned int) 260)
10537 ? 8 : UNITS_PER_WORD);
10539 /* In case we don't know anything about the alignment, default to
10540 library version, since it is usually equally fast and result in
10541 shorter code. */
10542 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10544 end_sequence ();
10545 return 0;
10548 if (TARGET_SINGLE_STRINGOP)
10549 emit_insn (gen_cld ());
10551 countreg2 = gen_reg_rtx (Pmode);
10552 countreg = copy_to_mode_reg (counter_mode, count_exp);
10554 /* We don't use loops to align destination and to copy parts smaller
10555 than 4 bytes, because gcc is able to optimize such code better (in
10556 the case the destination or the count really is aligned, gcc is often
10557 able to predict the branches) and also it is friendlier to the
10558 hardware branch prediction.
10560 Using loops is benefical for generic case, because we can
10561 handle small counts using the loops. Many CPUs (such as Athlon)
10562 have large REP prefix setup costs.
10564 This is quite costy. Maybe we can revisit this decision later or
10565 add some customizability to this code. */
10567 if (count == 0 && align < desired_alignment)
10569 label = gen_label_rtx ();
10570 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10571 LEU, 0, counter_mode, 1, label);
10573 if (align <= 1)
10575 rtx label = ix86_expand_aligntest (destreg, 1);
10576 emit_insn (gen_strmovqi (destreg, srcreg));
10577 ix86_adjust_counter (countreg, 1);
10578 emit_label (label);
10579 LABEL_NUSES (label) = 1;
10581 if (align <= 2)
10583 rtx label = ix86_expand_aligntest (destreg, 2);
10584 emit_insn (gen_strmovhi (destreg, srcreg));
10585 ix86_adjust_counter (countreg, 2);
10586 emit_label (label);
10587 LABEL_NUSES (label) = 1;
10589 if (align <= 4 && desired_alignment > 4)
10591 rtx label = ix86_expand_aligntest (destreg, 4);
10592 emit_insn (gen_strmovsi (destreg, srcreg));
10593 ix86_adjust_counter (countreg, 4);
10594 emit_label (label);
10595 LABEL_NUSES (label) = 1;
10598 if (label && desired_alignment > 4 && !TARGET_64BIT)
10600 emit_label (label);
10601 LABEL_NUSES (label) = 1;
10602 label = NULL_RTX;
10604 if (!TARGET_SINGLE_STRINGOP)
10605 emit_insn (gen_cld ());
10606 if (TARGET_64BIT)
10608 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10609 GEN_INT (3)));
10610 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10611 destreg, srcreg, countreg2));
10613 else
10615 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10616 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10617 destreg, srcreg, countreg2));
10620 if (label)
10622 emit_label (label);
10623 LABEL_NUSES (label) = 1;
10625 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10626 emit_insn (gen_strmovsi (destreg, srcreg));
10627 if ((align <= 4 || count == 0) && TARGET_64BIT)
10629 rtx label = ix86_expand_aligntest (countreg, 4);
10630 emit_insn (gen_strmovsi (destreg, srcreg));
10631 emit_label (label);
10632 LABEL_NUSES (label) = 1;
10634 if (align > 2 && count != 0 && (count & 2))
10635 emit_insn (gen_strmovhi (destreg, srcreg));
10636 if (align <= 2 || count == 0)
10638 rtx label = ix86_expand_aligntest (countreg, 2);
10639 emit_insn (gen_strmovhi (destreg, srcreg));
10640 emit_label (label);
10641 LABEL_NUSES (label) = 1;
10643 if (align > 1 && count != 0 && (count & 1))
10644 emit_insn (gen_strmovqi (destreg, srcreg));
10645 if (align <= 1 || count == 0)
10647 rtx label = ix86_expand_aligntest (countreg, 1);
10648 emit_insn (gen_strmovqi (destreg, srcreg));
10649 emit_label (label);
10650 LABEL_NUSES (label) = 1;
10654 insns = get_insns ();
10655 end_sequence ();
10657 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10658 emit_insn (insns);
10659 return 1;
10662 /* Expand string clear operation (bzero). Use i386 string operations when
10663 profitable. expand_movstr contains similar code. */
10665 ix86_expand_clrstr (src, count_exp, align_exp)
10666 rtx src, count_exp, align_exp;
10668 rtx destreg, zeroreg, countreg;
10669 enum machine_mode counter_mode;
10670 HOST_WIDE_INT align = 0;
10671 unsigned HOST_WIDE_INT count = 0;
10673 if (GET_CODE (align_exp) == CONST_INT)
10674 align = INTVAL (align_exp);
10676 /* This simple hack avoids all inlining code and simplifies code below. */
10677 if (!TARGET_ALIGN_STRINGOPS)
10678 align = 32;
10680 if (GET_CODE (count_exp) == CONST_INT)
10681 count = INTVAL (count_exp);
10682 /* Figure out proper mode for counter. For 32bits it is always SImode,
10683 for 64bits use SImode when possible, otherwise DImode.
10684 Set count to number of bytes copied when known at compile time. */
10685 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10686 || x86_64_zero_extended_value (count_exp))
10687 counter_mode = SImode;
10688 else
10689 counter_mode = DImode;
10691 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10693 emit_insn (gen_cld ());
10695 /* When optimizing for size emit simple rep ; movsb instruction for
10696 counts not divisible by 4. */
10698 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10700 countreg = ix86_zero_extend_to_Pmode (count_exp);
10701 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10702 if (TARGET_64BIT)
10703 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10704 destreg, countreg));
10705 else
10706 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10707 destreg, countreg));
10709 else if (count != 0
10710 && (align >= 8
10711 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10712 || optimize_size || count < (unsigned int) 64))
10714 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10715 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10716 if (count & ~(size - 1))
10718 countreg = copy_to_mode_reg (counter_mode,
10719 GEN_INT ((count >> (size == 4 ? 2 : 3))
10720 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10721 countreg = ix86_zero_extend_to_Pmode (countreg);
10722 if (size == 4)
10724 if (TARGET_64BIT)
10725 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10726 destreg, countreg));
10727 else
10728 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10729 destreg, countreg));
10731 else
10732 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10733 destreg, countreg));
10735 if (size == 8 && (count & 0x04))
10736 emit_insn (gen_strsetsi (destreg,
10737 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10738 if (count & 0x02)
10739 emit_insn (gen_strsethi (destreg,
10740 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10741 if (count & 0x01)
10742 emit_insn (gen_strsetqi (destreg,
10743 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10745 else
10747 rtx countreg2;
10748 rtx label = NULL;
10749 /* Compute desired alignment of the string operation. */
10750 int desired_alignment = (TARGET_PENTIUMPRO
10751 && (count == 0 || count >= (unsigned int) 260)
10752 ? 8 : UNITS_PER_WORD);
10754 /* In case we don't know anything about the alignment, default to
10755 library version, since it is usually equally fast and result in
10756 shorter code. */
10757 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10758 return 0;
10760 if (TARGET_SINGLE_STRINGOP)
10761 emit_insn (gen_cld ());
10763 countreg2 = gen_reg_rtx (Pmode);
10764 countreg = copy_to_mode_reg (counter_mode, count_exp);
10765 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10767 if (count == 0 && align < desired_alignment)
10769 label = gen_label_rtx ();
10770 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10771 LEU, 0, counter_mode, 1, label);
10773 if (align <= 1)
10775 rtx label = ix86_expand_aligntest (destreg, 1);
10776 emit_insn (gen_strsetqi (destreg,
10777 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10778 ix86_adjust_counter (countreg, 1);
10779 emit_label (label);
10780 LABEL_NUSES (label) = 1;
10782 if (align <= 2)
10784 rtx label = ix86_expand_aligntest (destreg, 2);
10785 emit_insn (gen_strsethi (destreg,
10786 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10787 ix86_adjust_counter (countreg, 2);
10788 emit_label (label);
10789 LABEL_NUSES (label) = 1;
10791 if (align <= 4 && desired_alignment > 4)
10793 rtx label = ix86_expand_aligntest (destreg, 4);
10794 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10795 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10796 : zeroreg)));
10797 ix86_adjust_counter (countreg, 4);
10798 emit_label (label);
10799 LABEL_NUSES (label) = 1;
10802 if (label && desired_alignment > 4 && !TARGET_64BIT)
10804 emit_label (label);
10805 LABEL_NUSES (label) = 1;
10806 label = NULL_RTX;
10809 if (!TARGET_SINGLE_STRINGOP)
10810 emit_insn (gen_cld ());
10811 if (TARGET_64BIT)
10813 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10814 GEN_INT (3)));
10815 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10816 destreg, countreg2));
10818 else
10820 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10821 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10822 destreg, countreg2));
10824 if (label)
10826 emit_label (label);
10827 LABEL_NUSES (label) = 1;
10830 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10831 emit_insn (gen_strsetsi (destreg,
10832 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10833 if (TARGET_64BIT && (align <= 4 || count == 0))
10835 rtx label = ix86_expand_aligntest (countreg, 4);
10836 emit_insn (gen_strsetsi (destreg,
10837 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10838 emit_label (label);
10839 LABEL_NUSES (label) = 1;
10841 if (align > 2 && count != 0 && (count & 2))
10842 emit_insn (gen_strsethi (destreg,
10843 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10844 if (align <= 2 || count == 0)
10846 rtx label = ix86_expand_aligntest (countreg, 2);
10847 emit_insn (gen_strsethi (destreg,
10848 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10849 emit_label (label);
10850 LABEL_NUSES (label) = 1;
10852 if (align > 1 && count != 0 && (count & 1))
10853 emit_insn (gen_strsetqi (destreg,
10854 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10855 if (align <= 1 || count == 0)
10857 rtx label = ix86_expand_aligntest (countreg, 1);
10858 emit_insn (gen_strsetqi (destreg,
10859 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10860 emit_label (label);
10861 LABEL_NUSES (label) = 1;
10864 return 1;
10866 /* Expand strlen. */
10868 ix86_expand_strlen (out, src, eoschar, align)
10869 rtx out, src, eoschar, align;
10871 rtx addr, scratch1, scratch2, scratch3, scratch4;
10873 /* The generic case of strlen expander is long. Avoid it's
10874 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10876 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10877 && !TARGET_INLINE_ALL_STRINGOPS
10878 && !optimize_size
10879 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10880 return 0;
10882 addr = force_reg (Pmode, XEXP (src, 0));
10883 scratch1 = gen_reg_rtx (Pmode);
10885 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10886 && !optimize_size)
10888 /* Well it seems that some optimizer does not combine a call like
10889 foo(strlen(bar), strlen(bar));
10890 when the move and the subtraction is done here. It does calculate
10891 the length just once when these instructions are done inside of
10892 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10893 often used and I use one fewer register for the lifetime of
10894 output_strlen_unroll() this is better. */
10896 emit_move_insn (out, addr);
10898 ix86_expand_strlensi_unroll_1 (out, align);
10900 /* strlensi_unroll_1 returns the address of the zero at the end of
10901 the string, like memchr(), so compute the length by subtracting
10902 the start address. */
10903 if (TARGET_64BIT)
10904 emit_insn (gen_subdi3 (out, out, addr));
10905 else
10906 emit_insn (gen_subsi3 (out, out, addr));
10908 else
10910 scratch2 = gen_reg_rtx (Pmode);
10911 scratch3 = gen_reg_rtx (Pmode);
10912 scratch4 = force_reg (Pmode, constm1_rtx);
10914 emit_move_insn (scratch3, addr);
10915 eoschar = force_reg (QImode, eoschar);
10917 emit_insn (gen_cld ());
10918 if (TARGET_64BIT)
10920 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10921 align, scratch4, scratch3));
10922 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10923 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10925 else
10927 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10928 align, scratch4, scratch3));
10929 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10930 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10933 return 1;
10936 /* Expand the appropriate insns for doing strlen if not just doing
10937 repnz; scasb
10939 out = result, initialized with the start address
10940 align_rtx = alignment of the address.
10941 scratch = scratch register, initialized with the startaddress when
10942 not aligned, otherwise undefined
10944 This is just the body. It needs the initialisations mentioned above and
10945 some address computing at the end. These things are done in i386.md. */
10947 static void
10948 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10949 rtx out, align_rtx;
10951 int align;
10952 rtx tmp;
10953 rtx align_2_label = NULL_RTX;
10954 rtx align_3_label = NULL_RTX;
10955 rtx align_4_label = gen_label_rtx ();
10956 rtx end_0_label = gen_label_rtx ();
10957 rtx mem;
10958 rtx tmpreg = gen_reg_rtx (SImode);
10959 rtx scratch = gen_reg_rtx (SImode);
10961 align = 0;
10962 if (GET_CODE (align_rtx) == CONST_INT)
10963 align = INTVAL (align_rtx);
10965 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10967 /* Is there a known alignment and is it less than 4? */
10968 if (align < 4)
10970 rtx scratch1 = gen_reg_rtx (Pmode);
10971 emit_move_insn (scratch1, out);
10972 /* Is there a known alignment and is it not 2? */
10973 if (align != 2)
10975 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10976 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10978 /* Leave just the 3 lower bits. */
10979 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10980 NULL_RTX, 0, OPTAB_WIDEN);
10982 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10983 Pmode, 1, align_4_label);
10984 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10985 Pmode, 1, align_2_label);
10986 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10987 Pmode, 1, align_3_label);
10989 else
10991 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10992 check if is aligned to 4 - byte. */
10994 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10995 NULL_RTX, 0, OPTAB_WIDEN);
10997 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10998 Pmode, 1, align_4_label);
11001 mem = gen_rtx_MEM (QImode, out);
11003 /* Now compare the bytes. */
11005 /* Compare the first n unaligned byte on a byte per byte basis. */
11006 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11007 QImode, 1, end_0_label);
11009 /* Increment the address. */
11010 if (TARGET_64BIT)
11011 emit_insn (gen_adddi3 (out, out, const1_rtx));
11012 else
11013 emit_insn (gen_addsi3 (out, out, const1_rtx));
11015 /* Not needed with an alignment of 2 */
11016 if (align != 2)
11018 emit_label (align_2_label);
11020 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11021 end_0_label);
11023 if (TARGET_64BIT)
11024 emit_insn (gen_adddi3 (out, out, const1_rtx));
11025 else
11026 emit_insn (gen_addsi3 (out, out, const1_rtx));
11028 emit_label (align_3_label);
11031 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11032 end_0_label);
11034 if (TARGET_64BIT)
11035 emit_insn (gen_adddi3 (out, out, const1_rtx));
11036 else
11037 emit_insn (gen_addsi3 (out, out, const1_rtx));
11040 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11041 align this loop. It gives only huge programs, but does not help to
11042 speed up. */
11043 emit_label (align_4_label);
11045 mem = gen_rtx_MEM (SImode, out);
11046 emit_move_insn (scratch, mem);
11047 if (TARGET_64BIT)
11048 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11049 else
11050 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11052 /* This formula yields a nonzero result iff one of the bytes is zero.
11053 This saves three branches inside loop and many cycles. */
11055 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11056 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11057 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11058 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11059 gen_int_mode (0x80808080, SImode)));
11060 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11061 align_4_label);
11063 if (TARGET_CMOVE)
11065 rtx reg = gen_reg_rtx (SImode);
11066 rtx reg2 = gen_reg_rtx (Pmode);
11067 emit_move_insn (reg, tmpreg);
11068 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11070 /* If zero is not in the first two bytes, move two bytes forward. */
11071 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11072 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11073 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11074 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11075 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11076 reg,
11077 tmpreg)));
11078 /* Emit lea manually to avoid clobbering of flags. */
11079 emit_insn (gen_rtx_SET (SImode, reg2,
11080 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11082 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11083 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11084 emit_insn (gen_rtx_SET (VOIDmode, out,
11085 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11086 reg2,
11087 out)));
11090 else
11092 rtx end_2_label = gen_label_rtx ();
11093 /* Is zero in the first two bytes? */
11095 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11096 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11097 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11098 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11099 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11100 pc_rtx);
11101 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11102 JUMP_LABEL (tmp) = end_2_label;
11104 /* Not in the first two. Move two bytes forward. */
11105 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11106 if (TARGET_64BIT)
11107 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11108 else
11109 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11111 emit_label (end_2_label);
11115 /* Avoid branch in fixing the byte. */
11116 tmpreg = gen_lowpart (QImode, tmpreg);
11117 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11118 if (TARGET_64BIT)
11119 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11120 else
11121 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11123 emit_label (end_0_label);
11126 void
11127 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
11128 rtx retval, fnaddr, callarg1, callarg2, pop;
11130 rtx use = NULL, call;
11132 if (pop == const0_rtx)
11133 pop = NULL;
11134 if (TARGET_64BIT && pop)
11135 abort ();
11137 #if TARGET_MACHO
11138 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11139 fnaddr = machopic_indirect_call_target (fnaddr);
11140 #else
11141 /* Static functions and indirect calls don't need the pic register. */
11142 if (! TARGET_64BIT && flag_pic
11143 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11144 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11145 use_reg (&use, pic_offset_table_rtx);
11147 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11149 rtx al = gen_rtx_REG (QImode, 0);
11150 emit_move_insn (al, callarg2);
11151 use_reg (&use, al);
11153 #endif /* TARGET_MACHO */
11155 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11157 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11158 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11161 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11162 if (retval)
11163 call = gen_rtx_SET (VOIDmode, retval, call);
11164 if (pop)
11166 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11167 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11168 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11171 call = emit_call_insn (call);
11172 if (use)
11173 CALL_INSN_FUNCTION_USAGE (call) = use;
11177 /* Clear stack slot assignments remembered from previous functions.
11178 This is called from INIT_EXPANDERS once before RTL is emitted for each
11179 function. */
11181 static struct machine_function *
11182 ix86_init_machine_status ()
11184 return ggc_alloc_cleared (sizeof (struct machine_function));
11187 /* Return a MEM corresponding to a stack slot with mode MODE.
11188 Allocate a new slot if necessary.
11190 The RTL for a function can have several slots available: N is
11191 which slot to use. */
11194 assign_386_stack_local (mode, n)
11195 enum machine_mode mode;
11196 int n;
11198 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11199 abort ();
11201 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11202 ix86_stack_locals[(int) mode][n]
11203 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11205 return ix86_stack_locals[(int) mode][n];
11208 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11210 static GTY(()) rtx ix86_tls_symbol;
11212 ix86_tls_get_addr ()
11215 if (!ix86_tls_symbol)
11217 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11218 (TARGET_GNU_TLS && !TARGET_64BIT)
11219 ? "___tls_get_addr"
11220 : "__tls_get_addr");
11223 return ix86_tls_symbol;
11226 /* Calculate the length of the memory address in the instruction
11227 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11229 static int
11230 memory_address_length (addr)
11231 rtx addr;
11233 struct ix86_address parts;
11234 rtx base, index, disp;
11235 int len;
11237 if (GET_CODE (addr) == PRE_DEC
11238 || GET_CODE (addr) == POST_INC
11239 || GET_CODE (addr) == PRE_MODIFY
11240 || GET_CODE (addr) == POST_MODIFY)
11241 return 0;
11243 if (! ix86_decompose_address (addr, &parts))
11244 abort ();
11246 base = parts.base;
11247 index = parts.index;
11248 disp = parts.disp;
11249 len = 0;
11251 /* Rule of thumb:
11252 - esp as the base always wants an index,
11253 - ebp as the base always wants a displacement. */
11255 /* Register Indirect. */
11256 if (base && !index && !disp)
11258 /* esp (for its index) and ebp (for its displacement) need
11259 the two-byte modrm form. */
11260 if (addr == stack_pointer_rtx
11261 || addr == arg_pointer_rtx
11262 || addr == frame_pointer_rtx
11263 || addr == hard_frame_pointer_rtx)
11264 len = 1;
11267 /* Direct Addressing. */
11268 else if (disp && !base && !index)
11269 len = 4;
11271 else
11273 /* Find the length of the displacement constant. */
11274 if (disp)
11276 if (GET_CODE (disp) == CONST_INT
11277 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11278 && base)
11279 len = 1;
11280 else
11281 len = 4;
11283 /* ebp always wants a displacement. */
11284 else if (base == hard_frame_pointer_rtx)
11285 len = 1;
11287 /* An index requires the two-byte modrm form... */
11288 if (index
11289 /* ...like esp, which always wants an index. */
11290 || base == stack_pointer_rtx
11291 || base == arg_pointer_rtx
11292 || base == frame_pointer_rtx)
11293 len += 1;
11296 return len;
11299 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11300 is set, expect that insn have 8bit immediate alternative. */
11302 ix86_attr_length_immediate_default (insn, shortform)
11303 rtx insn;
11304 int shortform;
11306 int len = 0;
11307 int i;
11308 extract_insn_cached (insn);
11309 for (i = recog_data.n_operands - 1; i >= 0; --i)
11310 if (CONSTANT_P (recog_data.operand[i]))
11312 if (len)
11313 abort ();
11314 if (shortform
11315 && GET_CODE (recog_data.operand[i]) == CONST_INT
11316 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11317 len = 1;
11318 else
11320 switch (get_attr_mode (insn))
11322 case MODE_QI:
11323 len+=1;
11324 break;
11325 case MODE_HI:
11326 len+=2;
11327 break;
11328 case MODE_SI:
11329 len+=4;
11330 break;
11331 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11332 case MODE_DI:
11333 len+=4;
11334 break;
11335 default:
11336 fatal_insn ("unknown insn mode", insn);
11340 return len;
11342 /* Compute default value for "length_address" attribute. */
11344 ix86_attr_length_address_default (insn)
11345 rtx insn;
11347 int i;
11349 if (get_attr_type (insn) == TYPE_LEA)
11351 rtx set = PATTERN (insn);
11352 if (GET_CODE (set) == SET)
11354 else if (GET_CODE (set) == PARALLEL
11355 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11356 set = XVECEXP (set, 0, 0);
11357 else
11359 #ifdef ENABLE_CHECKING
11360 abort ();
11361 #endif
11362 return 0;
11365 return memory_address_length (SET_SRC (set));
11368 extract_insn_cached (insn);
11369 for (i = recog_data.n_operands - 1; i >= 0; --i)
11370 if (GET_CODE (recog_data.operand[i]) == MEM)
11372 return memory_address_length (XEXP (recog_data.operand[i], 0));
11373 break;
11375 return 0;
11378 /* Return the maximum number of instructions a cpu can issue. */
11380 static int
11381 ix86_issue_rate ()
11383 switch (ix86_cpu)
11385 case PROCESSOR_PENTIUM:
11386 case PROCESSOR_K6:
11387 return 2;
11389 case PROCESSOR_PENTIUMPRO:
11390 case PROCESSOR_PENTIUM4:
11391 case PROCESSOR_ATHLON:
11392 return 3;
11394 default:
11395 return 1;
11399 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11400 by DEP_INSN and nothing set by DEP_INSN. */
11402 static int
11403 ix86_flags_dependant (insn, dep_insn, insn_type)
11404 rtx insn, dep_insn;
11405 enum attr_type insn_type;
11407 rtx set, set2;
11409 /* Simplify the test for uninteresting insns. */
11410 if (insn_type != TYPE_SETCC
11411 && insn_type != TYPE_ICMOV
11412 && insn_type != TYPE_FCMOV
11413 && insn_type != TYPE_IBR)
11414 return 0;
11416 if ((set = single_set (dep_insn)) != 0)
11418 set = SET_DEST (set);
11419 set2 = NULL_RTX;
11421 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11422 && XVECLEN (PATTERN (dep_insn), 0) == 2
11423 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11424 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11426 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11427 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11429 else
11430 return 0;
11432 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11433 return 0;
11435 /* This test is true if the dependent insn reads the flags but
11436 not any other potentially set register. */
11437 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11438 return 0;
11440 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11441 return 0;
11443 return 1;
11446 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11447 address with operands set by DEP_INSN. */
11449 static int
11450 ix86_agi_dependant (insn, dep_insn, insn_type)
11451 rtx insn, dep_insn;
11452 enum attr_type insn_type;
11454 rtx addr;
11456 if (insn_type == TYPE_LEA
11457 && TARGET_PENTIUM)
11459 addr = PATTERN (insn);
11460 if (GET_CODE (addr) == SET)
11462 else if (GET_CODE (addr) == PARALLEL
11463 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11464 addr = XVECEXP (addr, 0, 0);
11465 else
11466 abort ();
11467 addr = SET_SRC (addr);
11469 else
11471 int i;
11472 extract_insn_cached (insn);
11473 for (i = recog_data.n_operands - 1; i >= 0; --i)
11474 if (GET_CODE (recog_data.operand[i]) == MEM)
11476 addr = XEXP (recog_data.operand[i], 0);
11477 goto found;
11479 return 0;
11480 found:;
11483 return modified_in_p (addr, dep_insn);
11486 static int
11487 ix86_adjust_cost (insn, link, dep_insn, cost)
11488 rtx insn, link, dep_insn;
11489 int cost;
11491 enum attr_type insn_type, dep_insn_type;
11492 enum attr_memory memory, dep_memory;
11493 rtx set, set2;
11494 int dep_insn_code_number;
11496 /* Anti and output depenancies have zero cost on all CPUs. */
11497 if (REG_NOTE_KIND (link) != 0)
11498 return 0;
11500 dep_insn_code_number = recog_memoized (dep_insn);
11502 /* If we can't recognize the insns, we can't really do anything. */
11503 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11504 return cost;
11506 insn_type = get_attr_type (insn);
11507 dep_insn_type = get_attr_type (dep_insn);
11509 switch (ix86_cpu)
11511 case PROCESSOR_PENTIUM:
11512 /* Address Generation Interlock adds a cycle of latency. */
11513 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11514 cost += 1;
11516 /* ??? Compares pair with jump/setcc. */
11517 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11518 cost = 0;
11520 /* Floating point stores require value to be ready one cycle ealier. */
11521 if (insn_type == TYPE_FMOV
11522 && get_attr_memory (insn) == MEMORY_STORE
11523 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11524 cost += 1;
11525 break;
11527 case PROCESSOR_PENTIUMPRO:
11528 memory = get_attr_memory (insn);
11529 dep_memory = get_attr_memory (dep_insn);
11531 /* Since we can't represent delayed latencies of load+operation,
11532 increase the cost here for non-imov insns. */
11533 if (dep_insn_type != TYPE_IMOV
11534 && dep_insn_type != TYPE_FMOV
11535 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11536 cost += 1;
11538 /* INT->FP conversion is expensive. */
11539 if (get_attr_fp_int_src (dep_insn))
11540 cost += 5;
11542 /* There is one cycle extra latency between an FP op and a store. */
11543 if (insn_type == TYPE_FMOV
11544 && (set = single_set (dep_insn)) != NULL_RTX
11545 && (set2 = single_set (insn)) != NULL_RTX
11546 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11547 && GET_CODE (SET_DEST (set2)) == MEM)
11548 cost += 1;
11550 /* Show ability of reorder buffer to hide latency of load by executing
11551 in parallel with previous instruction in case
11552 previous instruction is not needed to compute the address. */
11553 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11554 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11556 /* Claim moves to take one cycle, as core can issue one load
11557 at time and the next load can start cycle later. */
11558 if (dep_insn_type == TYPE_IMOV
11559 || dep_insn_type == TYPE_FMOV)
11560 cost = 1;
11561 else if (cost > 1)
11562 cost--;
11564 break;
11566 case PROCESSOR_K6:
11567 memory = get_attr_memory (insn);
11568 dep_memory = get_attr_memory (dep_insn);
11569 /* The esp dependency is resolved before the instruction is really
11570 finished. */
11571 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11572 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11573 return 1;
11575 /* Since we can't represent delayed latencies of load+operation,
11576 increase the cost here for non-imov insns. */
11577 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11578 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11580 /* INT->FP conversion is expensive. */
11581 if (get_attr_fp_int_src (dep_insn))
11582 cost += 5;
11584 /* Show ability of reorder buffer to hide latency of load by executing
11585 in parallel with previous instruction in case
11586 previous instruction is not needed to compute the address. */
11587 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11588 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11590 /* Claim moves to take one cycle, as core can issue one load
11591 at time and the next load can start cycle later. */
11592 if (dep_insn_type == TYPE_IMOV
11593 || dep_insn_type == TYPE_FMOV)
11594 cost = 1;
11595 else if (cost > 2)
11596 cost -= 2;
11597 else
11598 cost = 1;
11600 break;
11602 case PROCESSOR_ATHLON:
11603 memory = get_attr_memory (insn);
11604 dep_memory = get_attr_memory (dep_insn);
11606 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11608 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11609 cost += 2;
11610 else
11611 cost += 3;
11613 /* Show ability of reorder buffer to hide latency of load by executing
11614 in parallel with previous instruction in case
11615 previous instruction is not needed to compute the address. */
11616 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11617 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11619 /* Claim moves to take one cycle, as core can issue one load
11620 at time and the next load can start cycle later. */
11621 if (dep_insn_type == TYPE_IMOV
11622 || dep_insn_type == TYPE_FMOV)
11623 cost = 0;
11624 else if (cost >= 3)
11625 cost -= 3;
11626 else
11627 cost = 0;
11630 default:
11631 break;
11634 return cost;
11637 static union
11639 struct ppro_sched_data
11641 rtx decode[3];
11642 int issued_this_cycle;
11643 } ppro;
11644 } ix86_sched_data;
11646 static enum attr_ppro_uops
11647 ix86_safe_ppro_uops (insn)
11648 rtx insn;
11650 if (recog_memoized (insn) >= 0)
11651 return get_attr_ppro_uops (insn);
11652 else
11653 return PPRO_UOPS_MANY;
11656 static void
11657 ix86_dump_ppro_packet (dump)
11658 FILE *dump;
11660 if (ix86_sched_data.ppro.decode[0])
11662 fprintf (dump, "PPRO packet: %d",
11663 INSN_UID (ix86_sched_data.ppro.decode[0]));
11664 if (ix86_sched_data.ppro.decode[1])
11665 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11666 if (ix86_sched_data.ppro.decode[2])
11667 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11668 fputc ('\n', dump);
11672 /* We're beginning a new block. Initialize data structures as necessary. */
11674 static void
11675 ix86_sched_init (dump, sched_verbose, veclen)
11676 FILE *dump ATTRIBUTE_UNUSED;
11677 int sched_verbose ATTRIBUTE_UNUSED;
11678 int veclen ATTRIBUTE_UNUSED;
11680 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11683 /* Shift INSN to SLOT, and shift everything else down. */
11685 static void
11686 ix86_reorder_insn (insnp, slot)
11687 rtx *insnp, *slot;
11689 if (insnp != slot)
11691 rtx insn = *insnp;
11693 insnp[0] = insnp[1];
11694 while (++insnp != slot);
11695 *insnp = insn;
11699 static void
11700 ix86_sched_reorder_ppro (ready, e_ready)
11701 rtx *ready;
11702 rtx *e_ready;
11704 rtx decode[3];
11705 enum attr_ppro_uops cur_uops;
11706 int issued_this_cycle;
11707 rtx *insnp;
11708 int i;
11710 /* At this point .ppro.decode contains the state of the three
11711 decoders from last "cycle". That is, those insns that were
11712 actually independent. But here we're scheduling for the
11713 decoder, and we may find things that are decodable in the
11714 same cycle. */
11716 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11717 issued_this_cycle = 0;
11719 insnp = e_ready;
11720 cur_uops = ix86_safe_ppro_uops (*insnp);
11722 /* If the decoders are empty, and we've a complex insn at the
11723 head of the priority queue, let it issue without complaint. */
11724 if (decode[0] == NULL)
11726 if (cur_uops == PPRO_UOPS_MANY)
11728 decode[0] = *insnp;
11729 goto ppro_done;
11732 /* Otherwise, search for a 2-4 uop unsn to issue. */
11733 while (cur_uops != PPRO_UOPS_FEW)
11735 if (insnp == ready)
11736 break;
11737 cur_uops = ix86_safe_ppro_uops (*--insnp);
11740 /* If so, move it to the head of the line. */
11741 if (cur_uops == PPRO_UOPS_FEW)
11742 ix86_reorder_insn (insnp, e_ready);
11744 /* Issue the head of the queue. */
11745 issued_this_cycle = 1;
11746 decode[0] = *e_ready--;
11749 /* Look for simple insns to fill in the other two slots. */
11750 for (i = 1; i < 3; ++i)
11751 if (decode[i] == NULL)
11753 if (ready > e_ready)
11754 goto ppro_done;
11756 insnp = e_ready;
11757 cur_uops = ix86_safe_ppro_uops (*insnp);
11758 while (cur_uops != PPRO_UOPS_ONE)
11760 if (insnp == ready)
11761 break;
11762 cur_uops = ix86_safe_ppro_uops (*--insnp);
11765 /* Found one. Move it to the head of the queue and issue it. */
11766 if (cur_uops == PPRO_UOPS_ONE)
11768 ix86_reorder_insn (insnp, e_ready);
11769 decode[i] = *e_ready--;
11770 issued_this_cycle++;
11771 continue;
11774 /* ??? Didn't find one. Ideally, here we would do a lazy split
11775 of 2-uop insns, issue one and queue the other. */
11778 ppro_done:
11779 if (issued_this_cycle == 0)
11780 issued_this_cycle = 1;
11781 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11784 /* We are about to being issuing insns for this clock cycle.
11785 Override the default sort algorithm to better slot instructions. */
11786 static int
11787 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11788 FILE *dump ATTRIBUTE_UNUSED;
11789 int sched_verbose ATTRIBUTE_UNUSED;
11790 rtx *ready;
11791 int *n_readyp;
11792 int clock_var ATTRIBUTE_UNUSED;
11794 int n_ready = *n_readyp;
11795 rtx *e_ready = ready + n_ready - 1;
11797 /* Make sure to go ahead and initialize key items in
11798 ix86_sched_data if we are not going to bother trying to
11799 reorder the ready queue. */
11800 if (n_ready < 2)
11802 ix86_sched_data.ppro.issued_this_cycle = 1;
11803 goto out;
11806 switch (ix86_cpu)
11808 default:
11809 break;
11811 case PROCESSOR_PENTIUMPRO:
11812 ix86_sched_reorder_ppro (ready, e_ready);
11813 break;
11816 out:
11817 return ix86_issue_rate ();
11820 /* We are about to issue INSN. Return the number of insns left on the
11821 ready queue that can be issued this cycle. */
11823 static int
11824 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11825 FILE *dump;
11826 int sched_verbose;
11827 rtx insn;
11828 int can_issue_more;
11830 int i;
11831 switch (ix86_cpu)
11833 default:
11834 return can_issue_more - 1;
11836 case PROCESSOR_PENTIUMPRO:
11838 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11840 if (uops == PPRO_UOPS_MANY)
11842 if (sched_verbose)
11843 ix86_dump_ppro_packet (dump);
11844 ix86_sched_data.ppro.decode[0] = insn;
11845 ix86_sched_data.ppro.decode[1] = NULL;
11846 ix86_sched_data.ppro.decode[2] = NULL;
11847 if (sched_verbose)
11848 ix86_dump_ppro_packet (dump);
11849 ix86_sched_data.ppro.decode[0] = NULL;
11851 else if (uops == PPRO_UOPS_FEW)
11853 if (sched_verbose)
11854 ix86_dump_ppro_packet (dump);
11855 ix86_sched_data.ppro.decode[0] = insn;
11856 ix86_sched_data.ppro.decode[1] = NULL;
11857 ix86_sched_data.ppro.decode[2] = NULL;
11859 else
11861 for (i = 0; i < 3; ++i)
11862 if (ix86_sched_data.ppro.decode[i] == NULL)
11864 ix86_sched_data.ppro.decode[i] = insn;
11865 break;
11867 if (i == 3)
11868 abort ();
11869 if (i == 2)
11871 if (sched_verbose)
11872 ix86_dump_ppro_packet (dump);
11873 ix86_sched_data.ppro.decode[0] = NULL;
11874 ix86_sched_data.ppro.decode[1] = NULL;
11875 ix86_sched_data.ppro.decode[2] = NULL;
11879 return --ix86_sched_data.ppro.issued_this_cycle;
11883 static int
11884 ia32_use_dfa_pipeline_interface ()
11886 if (ix86_cpu == PROCESSOR_PENTIUM)
11887 return 1;
11888 return 0;
11891 /* How many alternative schedules to try. This should be as wide as the
11892 scheduling freedom in the DFA, but no wider. Making this value too
11893 large results extra work for the scheduler. */
11895 static int
11896 ia32_multipass_dfa_lookahead ()
11898 if (ix86_cpu == PROCESSOR_PENTIUM)
11899 return 2;
11900 else
11901 return 0;
11905 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11906 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11907 appropriate. */
11909 void
11910 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11911 rtx insns;
11912 rtx dstref, srcref, dstreg, srcreg;
11914 rtx insn;
11916 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11917 if (INSN_P (insn))
11918 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11919 dstreg, srcreg);
11922 /* Subroutine of above to actually do the updating by recursively walking
11923 the rtx. */
11925 static void
11926 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11927 rtx x;
11928 rtx dstref, srcref, dstreg, srcreg;
11930 enum rtx_code code = GET_CODE (x);
11931 const char *format_ptr = GET_RTX_FORMAT (code);
11932 int i, j;
11934 if (code == MEM && XEXP (x, 0) == dstreg)
11935 MEM_COPY_ATTRIBUTES (x, dstref);
11936 else if (code == MEM && XEXP (x, 0) == srcreg)
11937 MEM_COPY_ATTRIBUTES (x, srcref);
11939 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11941 if (*format_ptr == 'e')
11942 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11943 dstreg, srcreg);
11944 else if (*format_ptr == 'E')
11945 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11946 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11947 dstreg, srcreg);
11951 /* Compute the alignment given to a constant that is being placed in memory.
11952 EXP is the constant and ALIGN is the alignment that the object would
11953 ordinarily have.
11954 The value of this function is used instead of that alignment to align
11955 the object. */
11958 ix86_constant_alignment (exp, align)
11959 tree exp;
11960 int align;
11962 if (TREE_CODE (exp) == REAL_CST)
11964 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11965 return 64;
11966 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11967 return 128;
11969 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11970 && align < 256)
11971 return 256;
11973 return align;
11976 /* Compute the alignment for a static variable.
11977 TYPE is the data type, and ALIGN is the alignment that
11978 the object would ordinarily have. The value of this function is used
11979 instead of that alignment to align the object. */
11982 ix86_data_alignment (type, align)
11983 tree type;
11984 int align;
11986 if (AGGREGATE_TYPE_P (type)
11987 && TYPE_SIZE (type)
11988 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11989 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11990 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11991 return 256;
11993 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11994 to 16byte boundary. */
11995 if (TARGET_64BIT)
11997 if (AGGREGATE_TYPE_P (type)
11998 && TYPE_SIZE (type)
11999 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12000 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12001 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12002 return 128;
12005 if (TREE_CODE (type) == ARRAY_TYPE)
12007 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12008 return 64;
12009 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12010 return 128;
12012 else if (TREE_CODE (type) == COMPLEX_TYPE)
12015 if (TYPE_MODE (type) == DCmode && align < 64)
12016 return 64;
12017 if (TYPE_MODE (type) == XCmode && align < 128)
12018 return 128;
12020 else if ((TREE_CODE (type) == RECORD_TYPE
12021 || TREE_CODE (type) == UNION_TYPE
12022 || TREE_CODE (type) == QUAL_UNION_TYPE)
12023 && TYPE_FIELDS (type))
12025 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12026 return 64;
12027 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12028 return 128;
12030 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12031 || TREE_CODE (type) == INTEGER_TYPE)
12033 if (TYPE_MODE (type) == DFmode && align < 64)
12034 return 64;
12035 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12036 return 128;
12039 return align;
12042 /* Compute the alignment for a local variable.
12043 TYPE is the data type, and ALIGN is the alignment that
12044 the object would ordinarily have. The value of this macro is used
12045 instead of that alignment to align the object. */
12048 ix86_local_alignment (type, align)
12049 tree type;
12050 int align;
12052 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12053 to 16byte boundary. */
12054 if (TARGET_64BIT)
12056 if (AGGREGATE_TYPE_P (type)
12057 && TYPE_SIZE (type)
12058 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12059 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12060 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12061 return 128;
12063 if (TREE_CODE (type) == ARRAY_TYPE)
12065 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12066 return 64;
12067 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12068 return 128;
12070 else if (TREE_CODE (type) == COMPLEX_TYPE)
12072 if (TYPE_MODE (type) == DCmode && align < 64)
12073 return 64;
12074 if (TYPE_MODE (type) == XCmode && align < 128)
12075 return 128;
12077 else if ((TREE_CODE (type) == RECORD_TYPE
12078 || TREE_CODE (type) == UNION_TYPE
12079 || TREE_CODE (type) == QUAL_UNION_TYPE)
12080 && TYPE_FIELDS (type))
12082 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12083 return 64;
12084 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12085 return 128;
12087 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12088 || TREE_CODE (type) == INTEGER_TYPE)
12091 if (TYPE_MODE (type) == DFmode && align < 64)
12092 return 64;
12093 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12094 return 128;
12096 return align;
12099 /* Emit RTL insns to initialize the variable parts of a trampoline.
12100 FNADDR is an RTX for the address of the function's pure code.
12101 CXT is an RTX for the static chain value for the function. */
12102 void
12103 x86_initialize_trampoline (tramp, fnaddr, cxt)
12104 rtx tramp, fnaddr, cxt;
12106 if (!TARGET_64BIT)
12108 /* Compute offset from the end of the jmp to the target function. */
12109 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12110 plus_constant (tramp, 10),
12111 NULL_RTX, 1, OPTAB_DIRECT);
12112 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12113 gen_int_mode (0xb9, QImode));
12114 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12115 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12116 gen_int_mode (0xe9, QImode));
12117 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12119 else
12121 int offset = 0;
12122 /* Try to load address using shorter movl instead of movabs.
12123 We may want to support movq for kernel mode, but kernel does not use
12124 trampolines at the moment. */
12125 if (x86_64_zero_extended_value (fnaddr))
12127 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12128 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12129 gen_int_mode (0xbb41, HImode));
12130 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12131 gen_lowpart (SImode, fnaddr));
12132 offset += 6;
12134 else
12136 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12137 gen_int_mode (0xbb49, HImode));
12138 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12139 fnaddr);
12140 offset += 10;
12142 /* Load static chain using movabs to r10. */
12143 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12144 gen_int_mode (0xba49, HImode));
12145 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12146 cxt);
12147 offset += 10;
12148 /* Jump to the r11 */
12149 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12150 gen_int_mode (0xff49, HImode));
12151 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12152 gen_int_mode (0xe3, QImode));
12153 offset += 3;
12154 if (offset > TRAMPOLINE_SIZE)
12155 abort ();
12158 #ifdef TRANSFER_FROM_TRAMPOLINE
12159 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12160 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12161 #endif
12164 #define def_builtin(MASK, NAME, TYPE, CODE) \
12165 do { \
12166 if ((MASK) & target_flags \
12167 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12168 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12169 NULL, NULL_TREE); \
12170 } while (0)
12172 struct builtin_description
12174 const unsigned int mask;
12175 const enum insn_code icode;
12176 const char *const name;
12177 const enum ix86_builtins code;
12178 const enum rtx_code comparison;
12179 const unsigned int flag;
12182 /* Used for builtins that are enabled both by -msse and -msse2. */
12183 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12184 #define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
12185 #define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
12187 static const struct builtin_description bdesc_comi[] =
12189 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12190 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12191 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12192 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12193 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12194 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12195 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12196 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12197 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12198 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12199 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12200 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12201 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12202 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12203 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12204 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12205 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12206 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12207 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12208 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12209 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12210 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12211 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12212 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12215 static const struct builtin_description bdesc_2arg[] =
12217 /* SSE */
12218 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12219 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12220 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12221 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12222 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12223 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12224 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12225 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12227 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12228 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12229 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12230 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12231 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12232 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12233 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12234 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12235 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12236 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12237 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12238 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12239 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12240 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12241 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12242 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12243 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12244 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12245 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12246 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12248 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12249 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12250 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12251 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12253 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12254 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12255 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12256 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12258 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12259 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12260 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12261 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12262 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12264 /* MMX */
12265 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12266 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12267 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12268 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12269 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12270 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12271 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12272 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12274 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12275 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12276 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12277 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12278 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12279 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12280 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12281 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12283 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12284 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12285 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12287 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12288 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12289 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12290 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12292 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12293 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12295 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12296 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12297 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12298 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12299 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12300 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12302 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12303 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12304 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12305 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12307 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12308 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12309 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12310 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12311 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12312 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12314 /* Special. */
12315 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12316 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12317 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12319 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12320 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12321 { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12323 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12324 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12325 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12326 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12327 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12328 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12330 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12331 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12332 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12333 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12334 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12335 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12337 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12338 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12339 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12340 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12342 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12343 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12345 /* SSE2 */
12346 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12347 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12348 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12349 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12350 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12351 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12352 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12353 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12355 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12356 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12357 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12358 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12359 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12360 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12361 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12362 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12363 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12364 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12365 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12366 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12367 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12368 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12369 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12370 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12371 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12372 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12373 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12374 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12376 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12377 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12378 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12379 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12381 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12382 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12383 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12384 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12386 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12387 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12388 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12390 /* SSE2 MMX */
12391 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12392 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12393 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12394 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12395 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12396 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12397 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12398 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12400 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12401 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12402 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12403 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12404 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12405 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12406 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12407 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12409 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12410 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12411 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12412 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12414 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12415 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12416 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12417 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12419 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12420 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12422 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12423 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12424 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12425 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12426 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12427 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12429 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12430 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12431 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12432 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12434 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12435 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12436 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12437 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12438 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12439 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12440 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12441 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12443 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12444 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12445 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12447 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12448 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12450 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12451 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12452 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12453 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12454 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12455 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12457 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12458 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12459 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12460 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12461 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12462 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12464 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12465 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12466 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12467 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12469 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12471 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12472 { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12473 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12474 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12477 static const struct builtin_description bdesc_1arg[] =
12479 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12480 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12482 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12483 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12484 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12486 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12487 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12488 { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12489 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12490 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12491 { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12493 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12494 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12495 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12496 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12498 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12500 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12501 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12503 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12504 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12505 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12506 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12507 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12509 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12511 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12512 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12513 { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12514 { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12516 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12517 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12518 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12520 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12523 void
12524 ix86_init_builtins ()
12526 if (TARGET_MMX)
12527 ix86_init_mmx_sse_builtins ();
12530 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12531 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12532 builtins. */
12533 static void
12534 ix86_init_mmx_sse_builtins ()
12536 const struct builtin_description * d;
12537 size_t i;
12539 tree pchar_type_node = build_pointer_type (char_type_node);
12540 tree pcchar_type_node = build_pointer_type (
12541 build_type_variant (char_type_node, 1, 0));
12542 tree pfloat_type_node = build_pointer_type (float_type_node);
12543 tree pcfloat_type_node = build_pointer_type (
12544 build_type_variant (float_type_node, 1, 0));
12545 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12546 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12547 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12549 /* Comparisons. */
12550 tree int_ftype_v4sf_v4sf
12551 = build_function_type_list (integer_type_node,
12552 V4SF_type_node, V4SF_type_node, NULL_TREE);
12553 tree v4si_ftype_v4sf_v4sf
12554 = build_function_type_list (V4SI_type_node,
12555 V4SF_type_node, V4SF_type_node, NULL_TREE);
12556 /* MMX/SSE/integer conversions. */
12557 tree int_ftype_v4sf
12558 = build_function_type_list (integer_type_node,
12559 V4SF_type_node, NULL_TREE);
12560 tree int64_ftype_v4sf
12561 = build_function_type_list (long_long_integer_type_node,
12562 V4SF_type_node, NULL_TREE);
12563 tree int_ftype_v8qi
12564 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12565 tree v4sf_ftype_v4sf_int
12566 = build_function_type_list (V4SF_type_node,
12567 V4SF_type_node, integer_type_node, NULL_TREE);
12568 tree v4sf_ftype_v4sf_int64
12569 = build_function_type_list (V4SF_type_node,
12570 V4SF_type_node, long_long_integer_type_node,
12571 NULL_TREE);
12572 tree v4sf_ftype_v4sf_v2si
12573 = build_function_type_list (V4SF_type_node,
12574 V4SF_type_node, V2SI_type_node, NULL_TREE);
12575 tree int_ftype_v4hi_int
12576 = build_function_type_list (integer_type_node,
12577 V4HI_type_node, integer_type_node, NULL_TREE);
12578 tree v4hi_ftype_v4hi_int_int
12579 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12580 integer_type_node, integer_type_node,
12581 NULL_TREE);
12582 /* Miscellaneous. */
12583 tree v8qi_ftype_v4hi_v4hi
12584 = build_function_type_list (V8QI_type_node,
12585 V4HI_type_node, V4HI_type_node, NULL_TREE);
12586 tree v4hi_ftype_v2si_v2si
12587 = build_function_type_list (V4HI_type_node,
12588 V2SI_type_node, V2SI_type_node, NULL_TREE);
12589 tree v4sf_ftype_v4sf_v4sf_int
12590 = build_function_type_list (V4SF_type_node,
12591 V4SF_type_node, V4SF_type_node,
12592 integer_type_node, NULL_TREE);
12593 tree v2si_ftype_v4hi_v4hi
12594 = build_function_type_list (V2SI_type_node,
12595 V4HI_type_node, V4HI_type_node, NULL_TREE);
12596 tree v4hi_ftype_v4hi_int
12597 = build_function_type_list (V4HI_type_node,
12598 V4HI_type_node, integer_type_node, NULL_TREE);
12599 tree v4hi_ftype_v4hi_di
12600 = build_function_type_list (V4HI_type_node,
12601 V4HI_type_node, long_long_unsigned_type_node,
12602 NULL_TREE);
12603 tree v2si_ftype_v2si_di
12604 = build_function_type_list (V2SI_type_node,
12605 V2SI_type_node, long_long_unsigned_type_node,
12606 NULL_TREE);
12607 tree void_ftype_void
12608 = build_function_type (void_type_node, void_list_node);
12609 tree void_ftype_unsigned
12610 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12611 tree unsigned_ftype_void
12612 = build_function_type (unsigned_type_node, void_list_node);
12613 tree di_ftype_void
12614 = build_function_type (long_long_unsigned_type_node, void_list_node);
12615 tree v4sf_ftype_void
12616 = build_function_type (V4SF_type_node, void_list_node);
12617 tree v2si_ftype_v4sf
12618 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12619 /* Loads/stores. */
12620 tree void_ftype_v8qi_v8qi_pchar
12621 = build_function_type_list (void_type_node,
12622 V8QI_type_node, V8QI_type_node,
12623 pchar_type_node, NULL_TREE);
12624 tree v4sf_ftype_pcfloat
12625 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12626 /* @@@ the type is bogus */
12627 tree v4sf_ftype_v4sf_pv2si
12628 = build_function_type_list (V4SF_type_node,
12629 V4SF_type_node, pv2si_type_node, NULL_TREE);
12630 tree void_ftype_pv2si_v4sf
12631 = build_function_type_list (void_type_node,
12632 pv2si_type_node, V4SF_type_node, NULL_TREE);
12633 tree void_ftype_pfloat_v4sf
12634 = build_function_type_list (void_type_node,
12635 pfloat_type_node, V4SF_type_node, NULL_TREE);
12636 tree void_ftype_pdi_di
12637 = build_function_type_list (void_type_node,
12638 pdi_type_node, long_long_unsigned_type_node,
12639 NULL_TREE);
12640 tree void_ftype_pv2di_v2di
12641 = build_function_type_list (void_type_node,
12642 pv2di_type_node, V2DI_type_node, NULL_TREE);
12643 /* Normal vector unops. */
12644 tree v4sf_ftype_v4sf
12645 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12647 /* Normal vector binops. */
12648 tree v4sf_ftype_v4sf_v4sf
12649 = build_function_type_list (V4SF_type_node,
12650 V4SF_type_node, V4SF_type_node, NULL_TREE);
12651 tree v8qi_ftype_v8qi_v8qi
12652 = build_function_type_list (V8QI_type_node,
12653 V8QI_type_node, V8QI_type_node, NULL_TREE);
12654 tree v4hi_ftype_v4hi_v4hi
12655 = build_function_type_list (V4HI_type_node,
12656 V4HI_type_node, V4HI_type_node, NULL_TREE);
12657 tree v2si_ftype_v2si_v2si
12658 = build_function_type_list (V2SI_type_node,
12659 V2SI_type_node, V2SI_type_node, NULL_TREE);
12660 tree di_ftype_di_di
12661 = build_function_type_list (long_long_unsigned_type_node,
12662 long_long_unsigned_type_node,
12663 long_long_unsigned_type_node, NULL_TREE);
12665 tree v2si_ftype_v2sf
12666 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12667 tree v2sf_ftype_v2si
12668 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12669 tree v2si_ftype_v2si
12670 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12671 tree v2sf_ftype_v2sf
12672 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12673 tree v2sf_ftype_v2sf_v2sf
12674 = build_function_type_list (V2SF_type_node,
12675 V2SF_type_node, V2SF_type_node, NULL_TREE);
12676 tree v2si_ftype_v2sf_v2sf
12677 = build_function_type_list (V2SI_type_node,
12678 V2SF_type_node, V2SF_type_node, NULL_TREE);
12679 tree pint_type_node = build_pointer_type (integer_type_node);
12680 tree pcint_type_node = build_pointer_type (
12681 build_type_variant (integer_type_node, 1, 0));
12682 tree pdouble_type_node = build_pointer_type (double_type_node);
12683 tree pcdouble_type_node = build_pointer_type (
12684 build_type_variant (double_type_node, 1, 0));
12685 tree int_ftype_v2df_v2df
12686 = build_function_type_list (integer_type_node,
12687 V2DF_type_node, V2DF_type_node, NULL_TREE);
12689 tree ti_ftype_void
12690 = build_function_type (intTI_type_node, void_list_node);
12691 tree v2di_ftype_void
12692 = build_function_type (V2DI_type_node, void_list_node);
12693 tree ti_ftype_ti_ti
12694 = build_function_type_list (intTI_type_node,
12695 intTI_type_node, intTI_type_node, NULL_TREE);
12696 tree void_ftype_pcvoid
12697 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12698 tree v2di_ftype_di
12699 = build_function_type_list (V2DI_type_node,
12700 long_long_unsigned_type_node, NULL_TREE);
12701 tree di_ftype_v2di
12702 = build_function_type_list (long_long_unsigned_type_node,
12703 V2DI_type_node, NULL_TREE);
12704 tree v4sf_ftype_v4si
12705 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12706 tree v4si_ftype_v4sf
12707 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12708 tree v2df_ftype_v4si
12709 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12710 tree v4si_ftype_v2df
12711 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12712 tree v2si_ftype_v2df
12713 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12714 tree v4sf_ftype_v2df
12715 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12716 tree v2df_ftype_v2si
12717 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12718 tree v2df_ftype_v4sf
12719 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12720 tree int_ftype_v2df
12721 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12722 tree int64_ftype_v2df
12723 = build_function_type_list (long_long_integer_type_node,
12724 V2DF_type_node, NULL_TREE);
12725 tree v2df_ftype_v2df_int
12726 = build_function_type_list (V2DF_type_node,
12727 V2DF_type_node, integer_type_node, NULL_TREE);
12728 tree v2df_ftype_v2df_int64
12729 = build_function_type_list (V2DF_type_node,
12730 V2DF_type_node, long_long_integer_type_node,
12731 NULL_TREE);
12732 tree v4sf_ftype_v4sf_v2df
12733 = build_function_type_list (V4SF_type_node,
12734 V4SF_type_node, V2DF_type_node, NULL_TREE);
12735 tree v2df_ftype_v2df_v4sf
12736 = build_function_type_list (V2DF_type_node,
12737 V2DF_type_node, V4SF_type_node, NULL_TREE);
12738 tree v2df_ftype_v2df_v2df_int
12739 = build_function_type_list (V2DF_type_node,
12740 V2DF_type_node, V2DF_type_node,
12741 integer_type_node,
12742 NULL_TREE);
12743 tree v2df_ftype_v2df_pv2si
12744 = build_function_type_list (V2DF_type_node,
12745 V2DF_type_node, pv2si_type_node, NULL_TREE);
12746 tree void_ftype_pv2si_v2df
12747 = build_function_type_list (void_type_node,
12748 pv2si_type_node, V2DF_type_node, NULL_TREE);
12749 tree void_ftype_pdouble_v2df
12750 = build_function_type_list (void_type_node,
12751 pdouble_type_node, V2DF_type_node, NULL_TREE);
12752 tree void_ftype_pint_int
12753 = build_function_type_list (void_type_node,
12754 pint_type_node, integer_type_node, NULL_TREE);
12755 tree void_ftype_v16qi_v16qi_pchar
12756 = build_function_type_list (void_type_node,
12757 V16QI_type_node, V16QI_type_node,
12758 pchar_type_node, NULL_TREE);
12759 tree v2df_ftype_pcdouble
12760 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12761 tree v2df_ftype_v2df_v2df
12762 = build_function_type_list (V2DF_type_node,
12763 V2DF_type_node, V2DF_type_node, NULL_TREE);
12764 tree v16qi_ftype_v16qi_v16qi
12765 = build_function_type_list (V16QI_type_node,
12766 V16QI_type_node, V16QI_type_node, NULL_TREE);
12767 tree v8hi_ftype_v8hi_v8hi
12768 = build_function_type_list (V8HI_type_node,
12769 V8HI_type_node, V8HI_type_node, NULL_TREE);
12770 tree v4si_ftype_v4si_v4si
12771 = build_function_type_list (V4SI_type_node,
12772 V4SI_type_node, V4SI_type_node, NULL_TREE);
12773 tree v2di_ftype_v2di_v2di
12774 = build_function_type_list (V2DI_type_node,
12775 V2DI_type_node, V2DI_type_node, NULL_TREE);
12776 tree v2di_ftype_v2df_v2df
12777 = build_function_type_list (V2DI_type_node,
12778 V2DF_type_node, V2DF_type_node, NULL_TREE);
12779 tree v2df_ftype_v2df
12780 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12781 tree v2df_ftype_double
12782 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12783 tree v2df_ftype_double_double
12784 = build_function_type_list (V2DF_type_node,
12785 double_type_node, double_type_node, NULL_TREE);
12786 tree int_ftype_v8hi_int
12787 = build_function_type_list (integer_type_node,
12788 V8HI_type_node, integer_type_node, NULL_TREE);
12789 tree v8hi_ftype_v8hi_int_int
12790 = build_function_type_list (V8HI_type_node,
12791 V8HI_type_node, integer_type_node,
12792 integer_type_node, NULL_TREE);
12793 tree v2di_ftype_v2di_int
12794 = build_function_type_list (V2DI_type_node,
12795 V2DI_type_node, integer_type_node, NULL_TREE);
12796 tree v4si_ftype_v4si_int
12797 = build_function_type_list (V4SI_type_node,
12798 V4SI_type_node, integer_type_node, NULL_TREE);
12799 tree v8hi_ftype_v8hi_int
12800 = build_function_type_list (V8HI_type_node,
12801 V8HI_type_node, integer_type_node, NULL_TREE);
12802 tree v8hi_ftype_v8hi_v2di
12803 = build_function_type_list (V8HI_type_node,
12804 V8HI_type_node, V2DI_type_node, NULL_TREE);
12805 tree v4si_ftype_v4si_v2di
12806 = build_function_type_list (V4SI_type_node,
12807 V4SI_type_node, V2DI_type_node, NULL_TREE);
12808 tree v4si_ftype_v8hi_v8hi
12809 = build_function_type_list (V4SI_type_node,
12810 V8HI_type_node, V8HI_type_node, NULL_TREE);
12811 tree di_ftype_v8qi_v8qi
12812 = build_function_type_list (long_long_unsigned_type_node,
12813 V8QI_type_node, V8QI_type_node, NULL_TREE);
12814 tree v2di_ftype_v16qi_v16qi
12815 = build_function_type_list (V2DI_type_node,
12816 V16QI_type_node, V16QI_type_node, NULL_TREE);
12817 tree int_ftype_v16qi
12818 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12819 tree v16qi_ftype_pcchar
12820 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12821 tree void_ftype_pchar_v16qi
12822 = build_function_type_list (void_type_node,
12823 pchar_type_node, V16QI_type_node, NULL_TREE);
12824 tree v4si_ftype_pcint
12825 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12826 tree void_ftype_pcint_v4si
12827 = build_function_type_list (void_type_node,
12828 pcint_type_node, V4SI_type_node, NULL_TREE);
12829 tree v2di_ftype_v2di
12830 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12832 /* Add all builtins that are more or less simple operations on two
12833 operands. */
12834 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12836 /* Use one of the operands; the target can have a different mode for
12837 mask-generating compares. */
12838 enum machine_mode mode;
12839 tree type;
12841 if (d->name == 0)
12842 continue;
12843 mode = insn_data[d->icode].operand[1].mode;
12845 switch (mode)
12847 case V16QImode:
12848 type = v16qi_ftype_v16qi_v16qi;
12849 break;
12850 case V8HImode:
12851 type = v8hi_ftype_v8hi_v8hi;
12852 break;
12853 case V4SImode:
12854 type = v4si_ftype_v4si_v4si;
12855 break;
12856 case V2DImode:
12857 type = v2di_ftype_v2di_v2di;
12858 break;
12859 case V2DFmode:
12860 type = v2df_ftype_v2df_v2df;
12861 break;
12862 case TImode:
12863 type = ti_ftype_ti_ti;
12864 break;
12865 case V4SFmode:
12866 type = v4sf_ftype_v4sf_v4sf;
12867 break;
12868 case V8QImode:
12869 type = v8qi_ftype_v8qi_v8qi;
12870 break;
12871 case V4HImode:
12872 type = v4hi_ftype_v4hi_v4hi;
12873 break;
12874 case V2SImode:
12875 type = v2si_ftype_v2si_v2si;
12876 break;
12877 case DImode:
12878 type = di_ftype_di_di;
12879 break;
12881 default:
12882 abort ();
12885 /* Override for comparisons. */
12886 if (d->icode == CODE_FOR_maskcmpv4sf3
12887 || d->icode == CODE_FOR_maskncmpv4sf3
12888 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12889 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12890 type = v4si_ftype_v4sf_v4sf;
12892 if (d->icode == CODE_FOR_maskcmpv2df3
12893 || d->icode == CODE_FOR_maskncmpv2df3
12894 || d->icode == CODE_FOR_vmmaskcmpv2df3
12895 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12896 type = v2di_ftype_v2df_v2df;
12898 def_builtin (d->mask, d->name, type, d->code);
12901 /* Add the remaining MMX insns with somewhat more complicated types. */
12902 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12903 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12904 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12905 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12906 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12908 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12909 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12910 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12912 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12913 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12915 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12916 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12918 /* comi/ucomi insns. */
12919 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12920 if (d->mask == MASK_SSE2)
12921 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12922 else
12923 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12925 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12926 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12927 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12929 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12930 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12931 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12932 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12933 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12934 def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12935 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12936 def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12937 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12938 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12939 def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12941 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12942 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12944 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12946 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12947 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12948 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12949 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12950 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12951 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12953 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12954 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12955 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12956 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12958 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12959 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12960 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12961 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12963 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12965 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12967 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12968 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12969 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12970 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12971 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12972 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12974 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12976 /* Original 3DNow! */
12977 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12978 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12979 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12980 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12981 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12982 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12983 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12984 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12985 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12986 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12987 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12988 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12989 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12990 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12991 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12992 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12993 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12994 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12995 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12996 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12998 /* 3DNow! extension as used in the Athlon CPU. */
12999 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13000 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13001 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13002 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13003 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13004 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13006 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13008 /* SSE2 */
13009 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13010 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13012 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13013 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13014 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13016 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13017 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13018 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13019 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13020 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13021 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13023 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13024 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13025 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13026 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13028 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13029 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13030 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13031 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13032 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13034 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13035 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13036 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13037 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13039 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13040 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13042 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13044 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13045 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13047 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13048 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13049 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13050 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13051 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13053 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13055 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13056 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13057 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13058 def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13060 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13061 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13062 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13064 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13065 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13066 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13067 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13069 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13070 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13071 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13072 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13073 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13074 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13075 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13077 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13078 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13079 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13081 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13082 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13083 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13084 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13085 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13086 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13087 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13089 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13091 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13092 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13093 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13095 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13096 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13097 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13099 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13100 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13102 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13103 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13104 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13105 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13107 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13108 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13109 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13110 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13112 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13113 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13115 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13118 /* Errors in the source file can cause expand_expr to return const0_rtx
13119 where we expect a vector. To avoid crashing, use one of the vector
13120 clear instructions. */
13121 static rtx
13122 safe_vector_operand (x, mode)
13123 rtx x;
13124 enum machine_mode mode;
13126 if (x != const0_rtx)
13127 return x;
13128 x = gen_reg_rtx (mode);
13130 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13131 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13132 : gen_rtx_SUBREG (DImode, x, 0)));
13133 else
13134 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13135 : gen_rtx_SUBREG (V4SFmode, x, 0)));
13136 return x;
13139 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13141 static rtx
13142 ix86_expand_binop_builtin (icode, arglist, target)
13143 enum insn_code icode;
13144 tree arglist;
13145 rtx target;
13147 rtx pat;
13148 tree arg0 = TREE_VALUE (arglist);
13149 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13150 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13151 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13152 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13153 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13154 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13156 if (VECTOR_MODE_P (mode0))
13157 op0 = safe_vector_operand (op0, mode0);
13158 if (VECTOR_MODE_P (mode1))
13159 op1 = safe_vector_operand (op1, mode1);
13161 if (! target
13162 || GET_MODE (target) != tmode
13163 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13164 target = gen_reg_rtx (tmode);
13166 if (GET_MODE (op1) == SImode && mode1 == TImode)
13168 rtx x = gen_reg_rtx (V4SImode);
13169 emit_insn (gen_sse2_loadd (x, op1));
13170 op1 = gen_lowpart (TImode, x);
13173 /* In case the insn wants input operands in modes different from
13174 the result, abort. */
13175 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13176 abort ();
13178 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13179 op0 = copy_to_mode_reg (mode0, op0);
13180 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13181 op1 = copy_to_mode_reg (mode1, op1);
13183 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13184 yet one of the two must not be a memory. This is normally enforced
13185 by expanders, but we didn't bother to create one here. */
13186 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13187 op0 = copy_to_mode_reg (mode0, op0);
13189 pat = GEN_FCN (icode) (target, op0, op1);
13190 if (! pat)
13191 return 0;
13192 emit_insn (pat);
13193 return target;
13196 /* Subroutine of ix86_expand_builtin to take care of stores. */
13198 static rtx
13199 ix86_expand_store_builtin (icode, arglist)
13200 enum insn_code icode;
13201 tree arglist;
13203 rtx pat;
13204 tree arg0 = TREE_VALUE (arglist);
13205 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13206 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13207 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13208 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13209 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13211 if (VECTOR_MODE_P (mode1))
13212 op1 = safe_vector_operand (op1, mode1);
13214 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13215 op1 = copy_to_mode_reg (mode1, op1);
13217 pat = GEN_FCN (icode) (op0, op1);
13218 if (pat)
13219 emit_insn (pat);
13220 return 0;
13223 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13225 static rtx
13226 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13227 enum insn_code icode;
13228 tree arglist;
13229 rtx target;
13230 int do_load;
13232 rtx pat;
13233 tree arg0 = TREE_VALUE (arglist);
13234 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13235 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13236 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13238 if (! target
13239 || GET_MODE (target) != tmode
13240 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13241 target = gen_reg_rtx (tmode);
13242 if (do_load)
13243 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13244 else
13246 if (VECTOR_MODE_P (mode0))
13247 op0 = safe_vector_operand (op0, mode0);
13249 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13250 op0 = copy_to_mode_reg (mode0, op0);
13253 pat = GEN_FCN (icode) (target, op0);
13254 if (! pat)
13255 return 0;
13256 emit_insn (pat);
13257 return target;
13260 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13261 sqrtss, rsqrtss, rcpss. */
13263 static rtx
13264 ix86_expand_unop1_builtin (icode, arglist, target)
13265 enum insn_code icode;
13266 tree arglist;
13267 rtx target;
13269 rtx pat;
13270 tree arg0 = TREE_VALUE (arglist);
13271 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13272 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13273 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13275 if (! target
13276 || GET_MODE (target) != tmode
13277 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13278 target = gen_reg_rtx (tmode);
13280 if (VECTOR_MODE_P (mode0))
13281 op0 = safe_vector_operand (op0, mode0);
13283 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13284 op0 = copy_to_mode_reg (mode0, op0);
13286 op1 = op0;
13287 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13288 op1 = copy_to_mode_reg (mode0, op1);
13290 pat = GEN_FCN (icode) (target, op0, op1);
13291 if (! pat)
13292 return 0;
13293 emit_insn (pat);
13294 return target;
13297 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13299 static rtx
13300 ix86_expand_sse_compare (d, arglist, target)
13301 const struct builtin_description *d;
13302 tree arglist;
13303 rtx target;
13305 rtx pat;
13306 tree arg0 = TREE_VALUE (arglist);
13307 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13308 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13309 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13310 rtx op2;
13311 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13312 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13313 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13314 enum rtx_code comparison = d->comparison;
13316 if (VECTOR_MODE_P (mode0))
13317 op0 = safe_vector_operand (op0, mode0);
13318 if (VECTOR_MODE_P (mode1))
13319 op1 = safe_vector_operand (op1, mode1);
13321 /* Swap operands if we have a comparison that isn't available in
13322 hardware. */
13323 if (d->flag)
13325 rtx tmp = gen_reg_rtx (mode1);
13326 emit_move_insn (tmp, op1);
13327 op1 = op0;
13328 op0 = tmp;
13331 if (! target
13332 || GET_MODE (target) != tmode
13333 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13334 target = gen_reg_rtx (tmode);
13336 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13337 op0 = copy_to_mode_reg (mode0, op0);
13338 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13339 op1 = copy_to_mode_reg (mode1, op1);
13341 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13342 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13343 if (! pat)
13344 return 0;
13345 emit_insn (pat);
13346 return target;
13349 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13351 static rtx
13352 ix86_expand_sse_comi (d, arglist, target)
13353 const struct builtin_description *d;
13354 tree arglist;
13355 rtx target;
13357 rtx pat;
13358 tree arg0 = TREE_VALUE (arglist);
13359 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13360 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13361 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13362 rtx op2;
13363 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13364 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13365 enum rtx_code comparison = d->comparison;
13367 if (VECTOR_MODE_P (mode0))
13368 op0 = safe_vector_operand (op0, mode0);
13369 if (VECTOR_MODE_P (mode1))
13370 op1 = safe_vector_operand (op1, mode1);
13372 /* Swap operands if we have a comparison that isn't available in
13373 hardware. */
13374 if (d->flag)
13376 rtx tmp = op1;
13377 op1 = op0;
13378 op0 = tmp;
13381 target = gen_reg_rtx (SImode);
13382 emit_move_insn (target, const0_rtx);
13383 target = gen_rtx_SUBREG (QImode, target, 0);
13385 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13386 op0 = copy_to_mode_reg (mode0, op0);
13387 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13388 op1 = copy_to_mode_reg (mode1, op1);
13390 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13391 pat = GEN_FCN (d->icode) (op0, op1);
13392 if (! pat)
13393 return 0;
13394 emit_insn (pat);
13395 emit_insn (gen_rtx_SET (VOIDmode,
13396 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13397 gen_rtx_fmt_ee (comparison, QImode,
13398 SET_DEST (pat),
13399 const0_rtx)));
13401 return SUBREG_REG (target);
13404 /* Expand an expression EXP that calls a built-in function,
13405 with result going to TARGET if that's convenient
13406 (and in mode MODE if that's convenient).
13407 SUBTARGET may be used as the target for computing one of EXP's operands.
13408 IGNORE is nonzero if the value is to be ignored. */
13411 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13412 tree exp;
13413 rtx target;
13414 rtx subtarget ATTRIBUTE_UNUSED;
13415 enum machine_mode mode ATTRIBUTE_UNUSED;
13416 int ignore ATTRIBUTE_UNUSED;
13418 const struct builtin_description *d;
13419 size_t i;
13420 enum insn_code icode;
13421 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13422 tree arglist = TREE_OPERAND (exp, 1);
13423 tree arg0, arg1, arg2;
13424 rtx op0, op1, op2, pat;
13425 enum machine_mode tmode, mode0, mode1, mode2;
13426 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13428 switch (fcode)
13430 case IX86_BUILTIN_EMMS:
13431 emit_insn (gen_emms ());
13432 return 0;
13434 case IX86_BUILTIN_SFENCE:
13435 emit_insn (gen_sfence ());
13436 return 0;
13438 case IX86_BUILTIN_PEXTRW:
13439 case IX86_BUILTIN_PEXTRW128:
13440 icode = (fcode == IX86_BUILTIN_PEXTRW
13441 ? CODE_FOR_mmx_pextrw
13442 : CODE_FOR_sse2_pextrw);
13443 arg0 = TREE_VALUE (arglist);
13444 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13445 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13446 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13447 tmode = insn_data[icode].operand[0].mode;
13448 mode0 = insn_data[icode].operand[1].mode;
13449 mode1 = insn_data[icode].operand[2].mode;
13451 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13452 op0 = copy_to_mode_reg (mode0, op0);
13453 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13455 /* @@@ better error message */
13456 error ("selector must be an immediate");
13457 return gen_reg_rtx (tmode);
13459 if (target == 0
13460 || GET_MODE (target) != tmode
13461 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13462 target = gen_reg_rtx (tmode);
13463 pat = GEN_FCN (icode) (target, op0, op1);
13464 if (! pat)
13465 return 0;
13466 emit_insn (pat);
13467 return target;
13469 case IX86_BUILTIN_PINSRW:
13470 case IX86_BUILTIN_PINSRW128:
13471 icode = (fcode == IX86_BUILTIN_PINSRW
13472 ? CODE_FOR_mmx_pinsrw
13473 : CODE_FOR_sse2_pinsrw);
13474 arg0 = TREE_VALUE (arglist);
13475 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13476 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13477 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13478 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13479 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13480 tmode = insn_data[icode].operand[0].mode;
13481 mode0 = insn_data[icode].operand[1].mode;
13482 mode1 = insn_data[icode].operand[2].mode;
13483 mode2 = insn_data[icode].operand[3].mode;
13485 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13486 op0 = copy_to_mode_reg (mode0, op0);
13487 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13488 op1 = copy_to_mode_reg (mode1, op1);
13489 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13491 /* @@@ better error message */
13492 error ("selector must be an immediate");
13493 return const0_rtx;
13495 if (target == 0
13496 || GET_MODE (target) != tmode
13497 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13498 target = gen_reg_rtx (tmode);
13499 pat = GEN_FCN (icode) (target, op0, op1, op2);
13500 if (! pat)
13501 return 0;
13502 emit_insn (pat);
13503 return target;
13505 case IX86_BUILTIN_MASKMOVQ:
13506 case IX86_BUILTIN_MASKMOVDQU:
13507 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13508 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13509 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13510 : CODE_FOR_sse2_maskmovdqu));
13511 /* Note the arg order is different from the operand order. */
13512 arg1 = TREE_VALUE (arglist);
13513 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13514 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13515 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13516 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13517 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13518 mode0 = insn_data[icode].operand[0].mode;
13519 mode1 = insn_data[icode].operand[1].mode;
13520 mode2 = insn_data[icode].operand[2].mode;
13522 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13523 op0 = copy_to_mode_reg (mode0, op0);
13524 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13525 op1 = copy_to_mode_reg (mode1, op1);
13526 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13527 op2 = copy_to_mode_reg (mode2, op2);
13528 pat = GEN_FCN (icode) (op0, op1, op2);
13529 if (! pat)
13530 return 0;
13531 emit_insn (pat);
13532 return 0;
13534 case IX86_BUILTIN_SQRTSS:
13535 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13536 case IX86_BUILTIN_RSQRTSS:
13537 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13538 case IX86_BUILTIN_RCPSS:
13539 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13541 case IX86_BUILTIN_LOADAPS:
13542 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13544 case IX86_BUILTIN_LOADUPS:
13545 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13547 case IX86_BUILTIN_STOREAPS:
13548 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13550 case IX86_BUILTIN_STOREUPS:
13551 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13553 case IX86_BUILTIN_LOADSS:
13554 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13556 case IX86_BUILTIN_STORESS:
13557 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13559 case IX86_BUILTIN_LOADHPS:
13560 case IX86_BUILTIN_LOADLPS:
13561 case IX86_BUILTIN_LOADHPD:
13562 case IX86_BUILTIN_LOADLPD:
13563 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13564 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13565 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13566 : CODE_FOR_sse2_movlpd);
13567 arg0 = TREE_VALUE (arglist);
13568 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13569 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13570 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13571 tmode = insn_data[icode].operand[0].mode;
13572 mode0 = insn_data[icode].operand[1].mode;
13573 mode1 = insn_data[icode].operand[2].mode;
13575 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13576 op0 = copy_to_mode_reg (mode0, op0);
13577 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13578 if (target == 0
13579 || GET_MODE (target) != tmode
13580 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13581 target = gen_reg_rtx (tmode);
13582 pat = GEN_FCN (icode) (target, op0, op1);
13583 if (! pat)
13584 return 0;
13585 emit_insn (pat);
13586 return target;
13588 case IX86_BUILTIN_STOREHPS:
13589 case IX86_BUILTIN_STORELPS:
13590 case IX86_BUILTIN_STOREHPD:
13591 case IX86_BUILTIN_STORELPD:
13592 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13593 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13594 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13595 : CODE_FOR_sse2_movlpd);
13596 arg0 = TREE_VALUE (arglist);
13597 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13598 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13599 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13600 mode0 = insn_data[icode].operand[1].mode;
13601 mode1 = insn_data[icode].operand[2].mode;
13603 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13604 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13605 op1 = copy_to_mode_reg (mode1, op1);
13607 pat = GEN_FCN (icode) (op0, op0, op1);
13608 if (! pat)
13609 return 0;
13610 emit_insn (pat);
13611 return 0;
13613 case IX86_BUILTIN_MOVNTPS:
13614 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13615 case IX86_BUILTIN_MOVNTQ:
13616 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13618 case IX86_BUILTIN_LDMXCSR:
13619 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13620 target = assign_386_stack_local (SImode, 0);
13621 emit_move_insn (target, op0);
13622 emit_insn (gen_ldmxcsr (target));
13623 return 0;
13625 case IX86_BUILTIN_STMXCSR:
13626 target = assign_386_stack_local (SImode, 0);
13627 emit_insn (gen_stmxcsr (target));
13628 return copy_to_mode_reg (SImode, target);
13630 case IX86_BUILTIN_SHUFPS:
13631 case IX86_BUILTIN_SHUFPD:
13632 icode = (fcode == IX86_BUILTIN_SHUFPS
13633 ? CODE_FOR_sse_shufps
13634 : CODE_FOR_sse2_shufpd);
13635 arg0 = TREE_VALUE (arglist);
13636 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13637 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13638 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13639 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13640 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13641 tmode = insn_data[icode].operand[0].mode;
13642 mode0 = insn_data[icode].operand[1].mode;
13643 mode1 = insn_data[icode].operand[2].mode;
13644 mode2 = insn_data[icode].operand[3].mode;
13646 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13647 op0 = copy_to_mode_reg (mode0, op0);
13648 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13649 op1 = copy_to_mode_reg (mode1, op1);
13650 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13652 /* @@@ better error message */
13653 error ("mask must be an immediate");
13654 return gen_reg_rtx (tmode);
13656 if (target == 0
13657 || GET_MODE (target) != tmode
13658 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13659 target = gen_reg_rtx (tmode);
13660 pat = GEN_FCN (icode) (target, op0, op1, op2);
13661 if (! pat)
13662 return 0;
13663 emit_insn (pat);
13664 return target;
13666 case IX86_BUILTIN_PSHUFW:
13667 case IX86_BUILTIN_PSHUFD:
13668 case IX86_BUILTIN_PSHUFHW:
13669 case IX86_BUILTIN_PSHUFLW:
13670 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13671 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13672 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13673 : CODE_FOR_mmx_pshufw);
13674 arg0 = TREE_VALUE (arglist);
13675 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13676 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13677 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13678 tmode = insn_data[icode].operand[0].mode;
13679 mode1 = insn_data[icode].operand[1].mode;
13680 mode2 = insn_data[icode].operand[2].mode;
13682 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13683 op0 = copy_to_mode_reg (mode1, op0);
13684 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13686 /* @@@ better error message */
13687 error ("mask must be an immediate");
13688 return const0_rtx;
13690 if (target == 0
13691 || GET_MODE (target) != tmode
13692 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13693 target = gen_reg_rtx (tmode);
13694 pat = GEN_FCN (icode) (target, op0, op1);
13695 if (! pat)
13696 return 0;
13697 emit_insn (pat);
13698 return target;
13700 case IX86_BUILTIN_PSLLDQI128:
13701 case IX86_BUILTIN_PSRLDQI128:
13702 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13703 : CODE_FOR_sse2_lshrti3);
13704 arg0 = TREE_VALUE (arglist);
13705 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13706 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13707 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13708 tmode = insn_data[icode].operand[0].mode;
13709 mode1 = insn_data[icode].operand[1].mode;
13710 mode2 = insn_data[icode].operand[2].mode;
13712 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13714 op0 = copy_to_reg (op0);
13715 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13717 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13719 error ("shift must be an immediate");
13720 return const0_rtx;
13722 target = gen_reg_rtx (V2DImode);
13723 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13724 if (! pat)
13725 return 0;
13726 emit_insn (pat);
13727 return target;
13729 case IX86_BUILTIN_FEMMS:
13730 emit_insn (gen_femms ());
13731 return NULL_RTX;
13733 case IX86_BUILTIN_PAVGUSB:
13734 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13736 case IX86_BUILTIN_PF2ID:
13737 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13739 case IX86_BUILTIN_PFACC:
13740 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13742 case IX86_BUILTIN_PFADD:
13743 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13745 case IX86_BUILTIN_PFCMPEQ:
13746 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13748 case IX86_BUILTIN_PFCMPGE:
13749 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13751 case IX86_BUILTIN_PFCMPGT:
13752 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13754 case IX86_BUILTIN_PFMAX:
13755 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13757 case IX86_BUILTIN_PFMIN:
13758 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13760 case IX86_BUILTIN_PFMUL:
13761 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13763 case IX86_BUILTIN_PFRCP:
13764 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13766 case IX86_BUILTIN_PFRCPIT1:
13767 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13769 case IX86_BUILTIN_PFRCPIT2:
13770 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13772 case IX86_BUILTIN_PFRSQIT1:
13773 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13775 case IX86_BUILTIN_PFRSQRT:
13776 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13778 case IX86_BUILTIN_PFSUB:
13779 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13781 case IX86_BUILTIN_PFSUBR:
13782 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13784 case IX86_BUILTIN_PI2FD:
13785 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13787 case IX86_BUILTIN_PMULHRW:
13788 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13790 case IX86_BUILTIN_PF2IW:
13791 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13793 case IX86_BUILTIN_PFNACC:
13794 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13796 case IX86_BUILTIN_PFPNACC:
13797 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13799 case IX86_BUILTIN_PI2FW:
13800 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13802 case IX86_BUILTIN_PSWAPDSI:
13803 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13805 case IX86_BUILTIN_PSWAPDSF:
13806 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13808 case IX86_BUILTIN_SSE_ZERO:
13809 target = gen_reg_rtx (V4SFmode);
13810 emit_insn (gen_sse_clrv4sf (target));
13811 return target;
13813 case IX86_BUILTIN_MMX_ZERO:
13814 target = gen_reg_rtx (DImode);
13815 emit_insn (gen_mmx_clrdi (target));
13816 return target;
13818 case IX86_BUILTIN_CLRTI:
13819 target = gen_reg_rtx (V2DImode);
13820 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13821 return target;
13824 case IX86_BUILTIN_SQRTSD:
13825 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13826 case IX86_BUILTIN_LOADAPD:
13827 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13828 case IX86_BUILTIN_LOADUPD:
13829 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13831 case IX86_BUILTIN_STOREAPD:
13832 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13833 case IX86_BUILTIN_STOREUPD:
13834 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13836 case IX86_BUILTIN_LOADSD:
13837 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13839 case IX86_BUILTIN_STORESD:
13840 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13842 case IX86_BUILTIN_SETPD1:
13843 target = assign_386_stack_local (DFmode, 0);
13844 arg0 = TREE_VALUE (arglist);
13845 emit_move_insn (adjust_address (target, DFmode, 0),
13846 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13847 op0 = gen_reg_rtx (V2DFmode);
13848 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13849 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13850 return op0;
13852 case IX86_BUILTIN_SETPD:
13853 target = assign_386_stack_local (V2DFmode, 0);
13854 arg0 = TREE_VALUE (arglist);
13855 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13856 emit_move_insn (adjust_address (target, DFmode, 0),
13857 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13858 emit_move_insn (adjust_address (target, DFmode, 8),
13859 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13860 op0 = gen_reg_rtx (V2DFmode);
13861 emit_insn (gen_sse2_movapd (op0, target));
13862 return op0;
13864 case IX86_BUILTIN_LOADRPD:
13865 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13866 gen_reg_rtx (V2DFmode), 1);
13867 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13868 return target;
13870 case IX86_BUILTIN_LOADPD1:
13871 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13872 gen_reg_rtx (V2DFmode), 1);
13873 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13874 return target;
13876 case IX86_BUILTIN_STOREPD1:
13877 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13878 case IX86_BUILTIN_STORERPD:
13879 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13881 case IX86_BUILTIN_CLRPD:
13882 target = gen_reg_rtx (V2DFmode);
13883 emit_insn (gen_sse_clrv2df (target));
13884 return target;
13886 case IX86_BUILTIN_MFENCE:
13887 emit_insn (gen_sse2_mfence ());
13888 return 0;
13889 case IX86_BUILTIN_LFENCE:
13890 emit_insn (gen_sse2_lfence ());
13891 return 0;
13893 case IX86_BUILTIN_CLFLUSH:
13894 arg0 = TREE_VALUE (arglist);
13895 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13896 icode = CODE_FOR_sse2_clflush;
13897 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13898 op0 = copy_to_mode_reg (Pmode, op0);
13900 emit_insn (gen_sse2_clflush (op0));
13901 return 0;
13903 case IX86_BUILTIN_MOVNTPD:
13904 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13905 case IX86_BUILTIN_MOVNTDQ:
13906 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13907 case IX86_BUILTIN_MOVNTI:
13908 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13910 case IX86_BUILTIN_LOADDQA:
13911 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13912 case IX86_BUILTIN_LOADDQU:
13913 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13914 case IX86_BUILTIN_LOADD:
13915 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13917 case IX86_BUILTIN_STOREDQA:
13918 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13919 case IX86_BUILTIN_STOREDQU:
13920 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13921 case IX86_BUILTIN_STORED:
13922 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13924 default:
13925 break;
13928 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13929 if (d->code == fcode)
13931 /* Compares are treated specially. */
13932 if (d->icode == CODE_FOR_maskcmpv4sf3
13933 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13934 || d->icode == CODE_FOR_maskncmpv4sf3
13935 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13936 || d->icode == CODE_FOR_maskcmpv2df3
13937 || d->icode == CODE_FOR_vmmaskcmpv2df3
13938 || d->icode == CODE_FOR_maskncmpv2df3
13939 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13940 return ix86_expand_sse_compare (d, arglist, target);
13942 return ix86_expand_binop_builtin (d->icode, arglist, target);
13945 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13946 if (d->code == fcode)
13947 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13949 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13950 if (d->code == fcode)
13951 return ix86_expand_sse_comi (d, arglist, target);
13953 /* @@@ Should really do something sensible here. */
13954 return 0;
13957 /* Store OPERAND to the memory after reload is completed. This means
13958 that we can't easily use assign_stack_local. */
13960 ix86_force_to_memory (mode, operand)
13961 enum machine_mode mode;
13962 rtx operand;
13964 rtx result;
13965 if (!reload_completed)
13966 abort ();
13967 if (TARGET_64BIT && TARGET_RED_ZONE)
13969 result = gen_rtx_MEM (mode,
13970 gen_rtx_PLUS (Pmode,
13971 stack_pointer_rtx,
13972 GEN_INT (-RED_ZONE_SIZE)));
13973 emit_move_insn (result, operand);
13975 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13977 switch (mode)
13979 case HImode:
13980 case SImode:
13981 operand = gen_lowpart (DImode, operand);
13982 /* FALLTHRU */
13983 case DImode:
13984 emit_insn (
13985 gen_rtx_SET (VOIDmode,
13986 gen_rtx_MEM (DImode,
13987 gen_rtx_PRE_DEC (DImode,
13988 stack_pointer_rtx)),
13989 operand));
13990 break;
13991 default:
13992 abort ();
13994 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13996 else
13998 switch (mode)
14000 case DImode:
14002 rtx operands[2];
14003 split_di (&operand, 1, operands, operands + 1);
14004 emit_insn (
14005 gen_rtx_SET (VOIDmode,
14006 gen_rtx_MEM (SImode,
14007 gen_rtx_PRE_DEC (Pmode,
14008 stack_pointer_rtx)),
14009 operands[1]));
14010 emit_insn (
14011 gen_rtx_SET (VOIDmode,
14012 gen_rtx_MEM (SImode,
14013 gen_rtx_PRE_DEC (Pmode,
14014 stack_pointer_rtx)),
14015 operands[0]));
14017 break;
14018 case HImode:
14019 /* It is better to store HImodes as SImodes. */
14020 if (!TARGET_PARTIAL_REG_STALL)
14021 operand = gen_lowpart (SImode, operand);
14022 /* FALLTHRU */
14023 case SImode:
14024 emit_insn (
14025 gen_rtx_SET (VOIDmode,
14026 gen_rtx_MEM (GET_MODE (operand),
14027 gen_rtx_PRE_DEC (SImode,
14028 stack_pointer_rtx)),
14029 operand));
14030 break;
14031 default:
14032 abort ();
14034 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14036 return result;
14039 /* Free operand from the memory. */
14040 void
14041 ix86_free_from_memory (mode)
14042 enum machine_mode mode;
14044 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14046 int size;
14048 if (mode == DImode || TARGET_64BIT)
14049 size = 8;
14050 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14051 size = 2;
14052 else
14053 size = 4;
14054 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14055 to pop or add instruction if registers are available. */
14056 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14057 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14058 GEN_INT (size))));
14062 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14063 QImode must go into class Q_REGS.
14064 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14065 movdf to do mem-to-mem moves through integer regs. */
14066 enum reg_class
14067 ix86_preferred_reload_class (x, class)
14068 rtx x;
14069 enum reg_class class;
14071 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14072 return NO_REGS;
14073 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14075 /* SSE can't load any constant directly yet. */
14076 if (SSE_CLASS_P (class))
14077 return NO_REGS;
14078 /* Floats can load 0 and 1. */
14079 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14081 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14082 if (MAYBE_SSE_CLASS_P (class))
14083 return (reg_class_subset_p (class, GENERAL_REGS)
14084 ? GENERAL_REGS : FLOAT_REGS);
14085 else
14086 return class;
14088 /* General regs can load everything. */
14089 if (reg_class_subset_p (class, GENERAL_REGS))
14090 return GENERAL_REGS;
14091 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14092 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14093 return NO_REGS;
14095 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14096 return NO_REGS;
14097 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14098 return Q_REGS;
14099 return class;
14102 /* If we are copying between general and FP registers, we need a memory
14103 location. The same is true for SSE and MMX registers.
14105 The macro can't work reliably when one of the CLASSES is class containing
14106 registers from multiple units (SSE, MMX, integer). We avoid this by never
14107 combining those units in single alternative in the machine description.
14108 Ensure that this constraint holds to avoid unexpected surprises.
14110 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14111 enforce these sanity checks. */
14113 ix86_secondary_memory_needed (class1, class2, mode, strict)
14114 enum reg_class class1, class2;
14115 enum machine_mode mode;
14116 int strict;
14118 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14119 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14120 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14121 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14122 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14123 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14125 if (strict)
14126 abort ();
14127 else
14128 return 1;
14130 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14131 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14132 && (mode) != SImode)
14133 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14134 && (mode) != SImode));
14136 /* Return the cost of moving data from a register in class CLASS1 to
14137 one in class CLASS2.
14139 It is not required that the cost always equal 2 when FROM is the same as TO;
14140 on some machines it is expensive to move between registers if they are not
14141 general registers. */
14143 ix86_register_move_cost (mode, class1, class2)
14144 enum machine_mode mode;
14145 enum reg_class class1, class2;
14147 /* In case we require secondary memory, compute cost of the store followed
14148 by load. In order to avoid bad register allocation choices, we need
14149 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14151 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14153 int cost = 1;
14155 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14156 MEMORY_MOVE_COST (mode, class1, 1));
14157 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14158 MEMORY_MOVE_COST (mode, class2, 1));
14160 /* In case of copying from general_purpose_register we may emit multiple
14161 stores followed by single load causing memory size mismatch stall.
14162 Count this as arbitarily high cost of 20. */
14163 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14164 cost += 20;
14166 /* In the case of FP/MMX moves, the registers actually overlap, and we
14167 have to switch modes in order to treat them differently. */
14168 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14169 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14170 cost += 20;
14172 return cost;
14175 /* Moves between SSE/MMX and integer unit are expensive. */
14176 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14177 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14178 return ix86_cost->mmxsse_to_integer;
14179 if (MAYBE_FLOAT_CLASS_P (class1))
14180 return ix86_cost->fp_move;
14181 if (MAYBE_SSE_CLASS_P (class1))
14182 return ix86_cost->sse_move;
14183 if (MAYBE_MMX_CLASS_P (class1))
14184 return ix86_cost->mmx_move;
14185 return 2;
14188 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14190 ix86_hard_regno_mode_ok (regno, mode)
14191 int regno;
14192 enum machine_mode mode;
14194 /* Flags and only flags can only hold CCmode values. */
14195 if (CC_REGNO_P (regno))
14196 return GET_MODE_CLASS (mode) == MODE_CC;
14197 if (GET_MODE_CLASS (mode) == MODE_CC
14198 || GET_MODE_CLASS (mode) == MODE_RANDOM
14199 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14200 return 0;
14201 if (FP_REGNO_P (regno))
14202 return VALID_FP_MODE_P (mode);
14203 if (SSE_REGNO_P (regno))
14204 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14205 if (MMX_REGNO_P (regno))
14206 return (TARGET_MMX
14207 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14208 /* We handle both integer and floats in the general purpose registers.
14209 In future we should be able to handle vector modes as well. */
14210 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14211 return 0;
14212 /* Take care for QImode values - they can be in non-QI regs, but then
14213 they do cause partial register stalls. */
14214 if (regno < 4 || mode != QImode || TARGET_64BIT)
14215 return 1;
14216 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14219 /* Return the cost of moving data of mode M between a
14220 register and memory. A value of 2 is the default; this cost is
14221 relative to those in `REGISTER_MOVE_COST'.
14223 If moving between registers and memory is more expensive than
14224 between two registers, you should define this macro to express the
14225 relative cost.
14227 Model also increased moving costs of QImode registers in non
14228 Q_REGS classes.
14231 ix86_memory_move_cost (mode, class, in)
14232 enum machine_mode mode;
14233 enum reg_class class;
14234 int in;
14236 if (FLOAT_CLASS_P (class))
14238 int index;
14239 switch (mode)
14241 case SFmode:
14242 index = 0;
14243 break;
14244 case DFmode:
14245 index = 1;
14246 break;
14247 case XFmode:
14248 case TFmode:
14249 index = 2;
14250 break;
14251 default:
14252 return 100;
14254 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14256 if (SSE_CLASS_P (class))
14258 int index;
14259 switch (GET_MODE_SIZE (mode))
14261 case 4:
14262 index = 0;
14263 break;
14264 case 8:
14265 index = 1;
14266 break;
14267 case 16:
14268 index = 2;
14269 break;
14270 default:
14271 return 100;
14273 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14275 if (MMX_CLASS_P (class))
14277 int index;
14278 switch (GET_MODE_SIZE (mode))
14280 case 4:
14281 index = 0;
14282 break;
14283 case 8:
14284 index = 1;
14285 break;
14286 default:
14287 return 100;
14289 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14291 switch (GET_MODE_SIZE (mode))
14293 case 1:
14294 if (in)
14295 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14296 : ix86_cost->movzbl_load);
14297 else
14298 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14299 : ix86_cost->int_store[0] + 4);
14300 break;
14301 case 2:
14302 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14303 default:
14304 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14305 if (mode == TFmode)
14306 mode = XFmode;
14307 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14308 * ((int) GET_MODE_SIZE (mode)
14309 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14313 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14314 static void
14315 ix86_svr3_asm_out_constructor (symbol, priority)
14316 rtx symbol;
14317 int priority ATTRIBUTE_UNUSED;
14319 init_section ();
14320 fputs ("\tpushl $", asm_out_file);
14321 assemble_name (asm_out_file, XSTR (symbol, 0));
14322 fputc ('\n', asm_out_file);
14324 #endif
14326 #if TARGET_MACHO
14328 static int current_machopic_label_num;
14330 /* Given a symbol name and its associated stub, write out the
14331 definition of the stub. */
14333 void
14334 machopic_output_stub (file, symb, stub)
14335 FILE *file;
14336 const char *symb, *stub;
14338 unsigned int length;
14339 char *binder_name, *symbol_name, lazy_ptr_name[32];
14340 int label = ++current_machopic_label_num;
14342 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14343 symb = (*targetm.strip_name_encoding) (symb);
14345 length = strlen (stub);
14346 binder_name = alloca (length + 32);
14347 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14349 length = strlen (symb);
14350 symbol_name = alloca (length + 32);
14351 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14353 sprintf (lazy_ptr_name, "L%d$lz", label);
14355 if (MACHOPIC_PURE)
14356 machopic_picsymbol_stub_section ();
14357 else
14358 machopic_symbol_stub_section ();
14360 fprintf (file, "%s:\n", stub);
14361 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14363 if (MACHOPIC_PURE)
14365 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14366 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14367 fprintf (file, "\tjmp %%edx\n");
14369 else
14370 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14372 fprintf (file, "%s:\n", binder_name);
14374 if (MACHOPIC_PURE)
14376 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14377 fprintf (file, "\tpushl %%eax\n");
14379 else
14380 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14382 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14384 machopic_lazy_symbol_ptr_section ();
14385 fprintf (file, "%s:\n", lazy_ptr_name);
14386 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14387 fprintf (file, "\t.long %s\n", binder_name);
14389 #endif /* TARGET_MACHO */
14391 /* Order the registers for register allocator. */
14393 void
14394 x86_order_regs_for_local_alloc ()
14396 int pos = 0;
14397 int i;
14399 /* First allocate the local general purpose registers. */
14400 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14401 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14402 reg_alloc_order [pos++] = i;
14404 /* Global general purpose registers. */
14405 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14406 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14407 reg_alloc_order [pos++] = i;
14409 /* x87 registers come first in case we are doing FP math
14410 using them. */
14411 if (!TARGET_SSE_MATH)
14412 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14413 reg_alloc_order [pos++] = i;
14415 /* SSE registers. */
14416 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14417 reg_alloc_order [pos++] = i;
14418 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14419 reg_alloc_order [pos++] = i;
14421 /* x87 registerts. */
14422 if (TARGET_SSE_MATH)
14423 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14424 reg_alloc_order [pos++] = i;
14426 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14427 reg_alloc_order [pos++] = i;
14429 /* Initialize the rest of array as we do not allocate some registers
14430 at all. */
14431 while (pos < FIRST_PSEUDO_REGISTER)
14432 reg_alloc_order [pos++] = 0;
14435 /* Returns an expression indicating where the this parameter is
14436 located on entry to the FUNCTION. */
14438 static rtx
14439 x86_this_parameter (function)
14440 tree function;
14442 tree type = TREE_TYPE (function);
14444 if (TARGET_64BIT)
14446 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14447 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14450 if (ix86_fntype_regparm (type) > 0)
14452 tree parm;
14454 parm = TYPE_ARG_TYPES (type);
14455 /* Figure out whether or not the function has a variable number of
14456 arguments. */
14457 for (; parm; parm = TREE_CHAIN (parm))
14458 if (TREE_VALUE (parm) == void_type_node)
14459 break;
14460 /* If not, the this parameter is in %eax. */
14461 if (parm)
14462 return gen_rtx_REG (SImode, 0);
14465 if (aggregate_value_p (TREE_TYPE (type)))
14466 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14467 else
14468 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14471 /* Determine whether x86_output_mi_thunk can succeed. */
14473 static bool
14474 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14475 tree thunk ATTRIBUTE_UNUSED;
14476 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14477 HOST_WIDE_INT vcall_offset;
14478 tree function;
14480 /* 64-bit can handle anything. */
14481 if (TARGET_64BIT)
14482 return true;
14484 /* For 32-bit, everything's fine if we have one free register. */
14485 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14486 return true;
14488 /* Need a free register for vcall_offset. */
14489 if (vcall_offset)
14490 return false;
14492 /* Need a free register for GOT references. */
14493 if (flag_pic && !(*targetm.binds_local_p) (function))
14494 return false;
14496 /* Otherwise ok. */
14497 return true;
14500 /* Output the assembler code for a thunk function. THUNK_DECL is the
14501 declaration for the thunk function itself, FUNCTION is the decl for
14502 the target function. DELTA is an immediate constant offset to be
14503 added to THIS. If VCALL_OFFSET is non-zero, the word at
14504 *(*this + vcall_offset) should be added to THIS. */
14506 static void
14507 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14508 FILE *file ATTRIBUTE_UNUSED;
14509 tree thunk ATTRIBUTE_UNUSED;
14510 HOST_WIDE_INT delta;
14511 HOST_WIDE_INT vcall_offset;
14512 tree function;
14514 rtx xops[3];
14515 rtx this = x86_this_parameter (function);
14516 rtx this_reg, tmp;
14518 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14519 pull it in now and let DELTA benefit. */
14520 if (REG_P (this))
14521 this_reg = this;
14522 else if (vcall_offset)
14524 /* Put the this parameter into %eax. */
14525 xops[0] = this;
14526 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14527 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14529 else
14530 this_reg = NULL_RTX;
14532 /* Adjust the this parameter by a fixed constant. */
14533 if (delta)
14535 xops[0] = GEN_INT (delta);
14536 xops[1] = this_reg ? this_reg : this;
14537 if (TARGET_64BIT)
14539 if (!x86_64_general_operand (xops[0], DImode))
14541 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14542 xops[1] = tmp;
14543 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14544 xops[0] = tmp;
14545 xops[1] = this;
14547 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14549 else
14550 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14553 /* Adjust the this parameter by a value stored in the vtable. */
14554 if (vcall_offset)
14556 if (TARGET_64BIT)
14557 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14558 else
14559 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14561 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14562 xops[1] = tmp;
14563 if (TARGET_64BIT)
14564 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14565 else
14566 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14568 /* Adjust the this parameter. */
14569 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14570 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14572 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14573 xops[0] = GEN_INT (vcall_offset);
14574 xops[1] = tmp2;
14575 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14576 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14578 xops[1] = this_reg;
14579 if (TARGET_64BIT)
14580 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14581 else
14582 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14585 /* If necessary, drop THIS back to its stack slot. */
14586 if (this_reg && this_reg != this)
14588 xops[0] = this_reg;
14589 xops[1] = this;
14590 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14593 xops[0] = DECL_RTL (function);
14594 if (TARGET_64BIT)
14596 if (!flag_pic || (*targetm.binds_local_p) (function))
14597 output_asm_insn ("jmp\t%P0", xops);
14598 else
14600 tmp = XEXP (xops[0], 0);
14601 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14602 tmp = gen_rtx_CONST (Pmode, tmp);
14603 tmp = gen_rtx_MEM (QImode, tmp);
14604 xops[0] = tmp;
14605 output_asm_insn ("jmp\t%A0", xops);
14608 else
14610 if (!flag_pic || (*targetm.binds_local_p) (function))
14611 output_asm_insn ("jmp\t%P0", xops);
14612 else
14614 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14615 output_set_got (tmp);
14617 xops[1] = tmp;
14618 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14619 output_asm_insn ("jmp\t{*}%1", xops);
14624 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14625 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14626 #endif
14628 static bool
14629 ix86_ms_bitfield_layout_p (record_type)
14630 tree record_type ATTRIBUTE_UNUSED;
14632 return TARGET_USE_MS_BITFIELD_LAYOUT;
14636 x86_field_alignment (field, computed)
14637 tree field;
14638 int computed;
14640 enum machine_mode mode;
14641 tree type = TREE_TYPE (field);
14643 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14644 return computed;
14645 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14646 ? get_inner_array_type (type) : type);
14647 if (mode == DFmode || mode == DCmode
14648 || GET_MODE_CLASS (mode) == MODE_INT
14649 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14650 return MIN (32, computed);
14651 return computed;
14654 /* Output assembler code to FILE to increment profiler label # LABELNO
14655 for profiling a function entry. */
14656 void
14657 x86_function_profiler (file, labelno)
14658 FILE *file;
14659 int labelno;
14661 if (TARGET_64BIT)
14662 if (flag_pic)
14664 #ifndef NO_PROFILE_COUNTERS
14665 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14666 #endif
14667 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14669 else
14671 #ifndef NO_PROFILE_COUNTERS
14672 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14673 #endif
14674 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14676 else if (flag_pic)
14678 #ifndef NO_PROFILE_COUNTERS
14679 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14680 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14681 #endif
14682 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14684 else
14686 #ifndef NO_PROFILE_COUNTERS
14687 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14688 PROFILE_COUNT_REGISTER);
14689 #endif
14690 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14694 /* Implement machine specific optimizations.
14695 At the moment we implement single transformation: AMD Athlon works faster
14696 when RET is not destination of conditional jump or directly preceeded
14697 by other jump instruction. We avoid the penalty by inserting NOP just
14698 before the RET instructions in such cases. */
14699 void
14700 x86_machine_dependent_reorg (first)
14701 rtx first ATTRIBUTE_UNUSED;
14703 edge e;
14705 if (!TARGET_ATHLON || !optimize || optimize_size)
14706 return;
14707 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14709 basic_block bb = e->src;
14710 rtx ret = bb->end;
14711 rtx prev;
14712 bool insert = false;
14714 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14715 continue;
14716 prev = prev_nonnote_insn (ret);
14717 if (prev && GET_CODE (prev) == CODE_LABEL)
14719 edge e;
14720 for (e = bb->pred; e; e = e->pred_next)
14721 if (EDGE_FREQUENCY (e) && e->src->index > 0
14722 && !(e->flags & EDGE_FALLTHRU))
14723 insert = 1;
14725 if (!insert)
14727 prev = prev_real_insn (ret);
14728 if (prev && GET_CODE (prev) == JUMP_INSN
14729 && any_condjump_p (prev))
14730 insert = 1;
14732 if (insert)
14733 emit_insn_before (gen_nop (), ret);
14737 #include "gt-i386.h"