i386.c (x86_promote_QImode): Set for Athlon
[official-gcc.git] / gcc / config / i386 / i386.c
blob93e1a65a6fe712bca27a7250bf4721fb6c49e435
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs *ix86_cost = &pentium_cost;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
399 /* In case the avreage insn count for single function invocation is
400 lower than this constant, emit fast (but longer) prologue and
401 epilogue code. */
402 #define FAST_PROLOGUE_INSN_COUNT 30
404 /* Set by prologue expander and used by epilogue expander to determine
405 the style used. */
406 static int use_fast_prologue_epilogue;
408 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
410 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
411 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
412 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
413 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
415 /* Array of the smallest class containing reg number REGNO, indexed by
416 REGNO. Used by REGNO_REG_CLASS in i386.h. */
418 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
420 /* ax, dx, cx, bx */
421 AREG, DREG, CREG, BREG,
422 /* si, di, bp, sp */
423 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
424 /* FP registers */
425 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
426 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
427 /* arg pointer */
428 NON_Q_REGS,
429 /* flags, fpsr, dirflag, frame */
430 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
431 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
432 SSE_REGS, SSE_REGS,
433 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
434 MMX_REGS, MMX_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
437 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
438 SSE_REGS, SSE_REGS,
441 /* The "default" register map used in 32bit mode. */
443 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
445 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
446 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
447 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
448 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
449 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
451 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
454 static int const x86_64_int_parameter_registers[6] =
456 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
457 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
460 static int const x86_64_int_return_registers[4] =
462 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
465 /* The "default" register map used in 64bit mode. */
466 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
468 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
469 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
470 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
471 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
472 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
473 8,9,10,11,12,13,14,15, /* extended integer registers */
474 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
477 /* Define the register numbers to be used in Dwarf debugging information.
478 The SVR4 reference port C compiler uses the following register numbers
479 in its Dwarf output code:
480 0 for %eax (gcc regno = 0)
481 1 for %ecx (gcc regno = 2)
482 2 for %edx (gcc regno = 1)
483 3 for %ebx (gcc regno = 3)
484 4 for %esp (gcc regno = 7)
485 5 for %ebp (gcc regno = 6)
486 6 for %esi (gcc regno = 4)
487 7 for %edi (gcc regno = 5)
488 The following three DWARF register numbers are never generated by
489 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
490 believes these numbers have these meanings.
491 8 for %eip (no gcc equivalent)
492 9 for %eflags (gcc regno = 17)
493 10 for %trapno (no gcc equivalent)
494 It is not at all clear how we should number the FP stack registers
495 for the x86 architecture. If the version of SDB on x86/svr4 were
496 a bit less brain dead with respect to floating-point then we would
497 have a precedent to follow with respect to DWARF register numbers
498 for x86 FP registers, but the SDB on x86/svr4 is so completely
499 broken with respect to FP registers that it is hardly worth thinking
500 of it as something to strive for compatibility with.
501 The version of x86/svr4 SDB I have at the moment does (partially)
502 seem to believe that DWARF register number 11 is associated with
503 the x86 register %st(0), but that's about all. Higher DWARF
504 register numbers don't seem to be associated with anything in
505 particular, and even for DWARF regno 11, SDB only seems to under-
506 stand that it should say that a variable lives in %st(0) (when
507 asked via an `=' command) if we said it was in DWARF regno 11,
508 but SDB still prints garbage when asked for the value of the
509 variable in question (via a `/' command).
510 (Also note that the labels SDB prints for various FP stack regs
511 when doing an `x' command are all wrong.)
512 Note that these problems generally don't affect the native SVR4
513 C compiler because it doesn't allow the use of -O with -g and
514 because when it is *not* optimizing, it allocates a memory
515 location for each floating-point variable, and the memory
516 location is what gets described in the DWARF AT_location
517 attribute for the variable in question.
518 Regardless of the severe mental illness of the x86/svr4 SDB, we
519 do something sensible here and we use the following DWARF
520 register numbers. Note that these are all stack-top-relative
521 numbers.
522 11 for %st(0) (gcc regno = 8)
523 12 for %st(1) (gcc regno = 9)
524 13 for %st(2) (gcc regno = 10)
525 14 for %st(3) (gcc regno = 11)
526 15 for %st(4) (gcc regno = 12)
527 16 for %st(5) (gcc regno = 13)
528 17 for %st(6) (gcc regno = 14)
529 18 for %st(7) (gcc regno = 15)
531 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
533 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
534 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
535 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
536 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
537 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
539 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
542 /* Test and compare insns in i386.md store the information needed to
543 generate branch and scc insns here. */
545 rtx ix86_compare_op0 = NULL_RTX;
546 rtx ix86_compare_op1 = NULL_RTX;
548 /* The encoding characters for the four TLS models present in ELF. */
550 static char const tls_model_chars[] = " GLil";
552 #define MAX_386_STACK_LOCALS 3
553 /* Size of the register save area. */
554 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
556 /* Define the structure for the machine field in struct function. */
557 struct machine_function GTY(())
559 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
560 const char *some_ld_name;
561 int save_varrargs_registers;
562 int accesses_prev_frame;
565 #define ix86_stack_locals (cfun->machine->stack_locals)
566 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
568 /* Structure describing stack frame layout.
569 Stack grows downward:
571 [arguments]
572 <- ARG_POINTER
573 saved pc
575 saved frame pointer if frame_pointer_needed
576 <- HARD_FRAME_POINTER
577 [saved regs]
579 [padding1] \
581 [va_arg registers] (
582 > to_allocate <- FRAME_POINTER
583 [frame] (
585 [padding2] /
587 struct ix86_frame
589 int nregs;
590 int padding1;
591 int va_arg_size;
592 HOST_WIDE_INT frame;
593 int padding2;
594 int outgoing_arguments_size;
595 int red_zone_size;
597 HOST_WIDE_INT to_allocate;
598 /* The offsets relative to ARG_POINTER. */
599 HOST_WIDE_INT frame_pointer_offset;
600 HOST_WIDE_INT hard_frame_pointer_offset;
601 HOST_WIDE_INT stack_pointer_offset;
604 /* Used to enable/disable debugging features. */
605 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
606 /* Code model option as passed by user. */
607 const char *ix86_cmodel_string;
608 /* Parsed value. */
609 enum cmodel ix86_cmodel;
610 /* Asm dialect. */
611 const char *ix86_asm_string;
612 enum asm_dialect ix86_asm_dialect = ASM_ATT;
613 /* TLS dialext. */
614 const char *ix86_tls_dialect_string;
615 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
617 /* Which unit we are generating floating point math for. */
618 enum fpmath_unit ix86_fpmath;
620 /* Which cpu are we scheduling for. */
621 enum processor_type ix86_cpu;
622 /* Which instruction set architecture to use. */
623 enum processor_type ix86_arch;
625 /* Strings to hold which cpu and instruction set architecture to use. */
626 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
627 const char *ix86_arch_string; /* for -march=<xxx> */
628 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
630 /* # of registers to use to pass arguments. */
631 const char *ix86_regparm_string;
633 /* true if sse prefetch instruction is not NOOP. */
634 int x86_prefetch_sse;
636 /* ix86_regparm_string as a number */
637 int ix86_regparm;
639 /* Alignment to use for loops and jumps: */
641 /* Power of two alignment for loops. */
642 const char *ix86_align_loops_string;
644 /* Power of two alignment for non-loop jumps. */
645 const char *ix86_align_jumps_string;
647 /* Power of two alignment for stack boundary in bytes. */
648 const char *ix86_preferred_stack_boundary_string;
650 /* Preferred alignment for stack boundary in bits. */
651 int ix86_preferred_stack_boundary;
653 /* Values 1-5: see jump.c */
654 int ix86_branch_cost;
655 const char *ix86_branch_cost_string;
657 /* Power of two alignment for functions. */
658 const char *ix86_align_funcs_string;
660 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
661 static char internal_label_prefix[16];
662 static int internal_label_prefix_len;
664 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
665 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
666 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
667 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
668 int, int, FILE *));
669 static const char *get_some_local_dynamic_name PARAMS ((void));
670 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
671 static rtx maybe_get_pool_constant PARAMS ((rtx));
672 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
673 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
674 rtx *, rtx *));
675 static rtx get_thread_pointer PARAMS ((void));
676 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
677 static rtx gen_push PARAMS ((rtx));
678 static int memory_address_length PARAMS ((rtx addr));
679 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
680 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
681 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
682 static void ix86_dump_ppro_packet PARAMS ((FILE *));
683 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
684 static struct machine_function * ix86_init_machine_status PARAMS ((void));
685 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
686 static int ix86_nsaved_regs PARAMS ((void));
687 static void ix86_emit_save_regs PARAMS ((void));
688 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
689 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
690 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
691 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
692 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
693 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
694 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
695 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
696 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
697 static int ix86_issue_rate PARAMS ((void));
698 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
699 static void ix86_sched_init PARAMS ((FILE *, int, int));
700 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
701 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
702 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
703 static int ia32_multipass_dfa_lookahead PARAMS ((void));
704 static void ix86_init_mmx_sse_builtins PARAMS ((void));
706 struct ix86_address
708 rtx base, index, disp;
709 HOST_WIDE_INT scale;
712 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
714 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
715 static const char *ix86_strip_name_encoding PARAMS ((const char *))
716 ATTRIBUTE_UNUSED;
718 struct builtin_description;
719 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
720 tree, rtx));
721 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
722 tree, rtx));
723 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
724 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
725 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
726 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
727 tree, rtx));
728 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
729 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
730 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
731 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
732 enum rtx_code *,
733 enum rtx_code *,
734 enum rtx_code *));
735 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
736 rtx *, rtx *));
737 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
740 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
741 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
742 static int ix86_save_reg PARAMS ((unsigned int, int));
743 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
744 static int ix86_comp_type_attributes PARAMS ((tree, tree));
745 const struct attribute_spec ix86_attribute_table[];
746 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
747 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
749 #ifdef DO_GLOBAL_CTORS_BODY
750 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
751 #endif
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
761 enum x86_64_reg_class
763 X86_64_NO_CLASS,
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
766 X86_64_SSE_CLASS,
767 X86_64_SSESF_CLASS,
768 X86_64_SSEDF_CLASS,
769 X86_64_SSEUP_CLASS,
770 X86_64_X87_CLASS,
771 X86_64_X87UP_CLASS,
772 X86_64_MEMORY_CLASS
774 static const char * const x86_64_reg_class_name[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
777 #define MAX_CLASSES 4
778 static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
780 int));
781 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
782 int *));
783 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
784 const int *, int));
785 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
794 #endif
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
817 #ifdef ASM_QUAD
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
820 #endif
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
846 #ifdef HAVE_AS_TLS
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
849 #endif
851 struct gcc_target targetm = TARGET_INITIALIZER;
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
857 been parsed.
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
862 void
863 override_options ()
865 int i;
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
869 static struct ptt
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
875 const int align_loop_max_skip;
876 const int align_jump;
877 const int align_jump_max_skip;
878 const int align_func;
879 const int branch_cost;
881 const processor_target_table[PROCESSOR_max] =
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
893 static struct pta
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
897 const enum pta_flags
899 PTA_SSE = 1,
900 PTA_SSE2 = 2,
901 PTA_MMX = 4,
902 PTA_PREFETCH_SSE = 8,
903 PTA_3DNOW = 16,
904 PTA_3DNOW_A = 64
905 } flags;
907 const processor_alias_table[] =
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"i686", PROCESSOR_PENTIUMPRO, 0},
915 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
916 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
917 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
918 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
919 PTA_MMX | PTA_PREFETCH_SSE},
920 {"k6", PROCESSOR_K6, PTA_MMX},
921 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
922 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
923 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
924 | PTA_3DNOW_A},
925 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
926 | PTA_3DNOW | PTA_3DNOW_A},
927 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
928 | PTA_3DNOW_A | PTA_SSE},
929 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
930 | PTA_3DNOW_A | PTA_SSE},
931 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
932 | PTA_3DNOW_A | PTA_SSE},
935 int const pta_size = ARRAY_SIZE (processor_alias_table);
937 #ifdef SUBTARGET_OVERRIDE_OPTIONS
938 SUBTARGET_OVERRIDE_OPTIONS;
939 #endif
941 if (!ix86_cpu_string && ix86_arch_string)
942 ix86_cpu_string = ix86_arch_string;
943 if (!ix86_cpu_string)
944 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
945 if (!ix86_arch_string)
946 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
948 if (ix86_cmodel_string != 0)
950 if (!strcmp (ix86_cmodel_string, "small"))
951 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
952 else if (flag_pic)
953 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
954 else if (!strcmp (ix86_cmodel_string, "32"))
955 ix86_cmodel = CM_32;
956 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
957 ix86_cmodel = CM_KERNEL;
958 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
959 ix86_cmodel = CM_MEDIUM;
960 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
961 ix86_cmodel = CM_LARGE;
962 else
963 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
965 else
967 ix86_cmodel = CM_32;
968 if (TARGET_64BIT)
969 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
971 if (ix86_asm_string != 0)
973 if (!strcmp (ix86_asm_string, "intel"))
974 ix86_asm_dialect = ASM_INTEL;
975 else if (!strcmp (ix86_asm_string, "att"))
976 ix86_asm_dialect = ASM_ATT;
977 else
978 error ("bad value (%s) for -masm= switch", ix86_asm_string);
980 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
981 error ("code model `%s' not supported in the %s bit mode",
982 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
983 if (ix86_cmodel == CM_LARGE)
984 sorry ("code model `large' not supported yet");
985 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
986 sorry ("%i-bit mode not compiled in",
987 (target_flags & MASK_64BIT) ? 64 : 32);
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
992 ix86_arch = processor_alias_table[i].processor;
993 /* Default cpu tuning to the architecture. */
994 ix86_cpu = ix86_arch;
995 if (processor_alias_table[i].flags & PTA_MMX
996 && !(target_flags & MASK_MMX_SET))
997 target_flags |= MASK_MMX;
998 if (processor_alias_table[i].flags & PTA_3DNOW
999 && !(target_flags & MASK_3DNOW_SET))
1000 target_flags |= MASK_3DNOW;
1001 if (processor_alias_table[i].flags & PTA_3DNOW_A
1002 && !(target_flags & MASK_3DNOW_A_SET))
1003 target_flags |= MASK_3DNOW_A;
1004 if (processor_alias_table[i].flags & PTA_SSE
1005 && !(target_flags & MASK_SSE_SET))
1006 target_flags |= MASK_SSE;
1007 if (processor_alias_table[i].flags & PTA_SSE2
1008 && !(target_flags & MASK_SSE2_SET))
1009 target_flags |= MASK_SSE2;
1010 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1011 x86_prefetch_sse = true;
1012 break;
1015 if (i == pta_size)
1016 error ("bad value (%s) for -march= switch", ix86_arch_string);
1018 for (i = 0; i < pta_size; i++)
1019 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1021 ix86_cpu = processor_alias_table[i].processor;
1022 break;
1024 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1025 x86_prefetch_sse = true;
1026 if (i == pta_size)
1027 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1029 if (optimize_size)
1030 ix86_cost = &size_cost;
1031 else
1032 ix86_cost = processor_target_table[ix86_cpu].cost;
1033 target_flags |= processor_target_table[ix86_cpu].target_enable;
1034 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1036 /* Arrange to set up i386_stack_locals for all functions. */
1037 init_machine_status = ix86_init_machine_status;
1039 /* Validate -mregparm= value. */
1040 if (ix86_regparm_string)
1042 i = atoi (ix86_regparm_string);
1043 if (i < 0 || i > REGPARM_MAX)
1044 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1045 else
1046 ix86_regparm = i;
1048 else
1049 if (TARGET_64BIT)
1050 ix86_regparm = REGPARM_MAX;
1052 /* If the user has provided any of the -malign-* options,
1053 warn and use that value only if -falign-* is not set.
1054 Remove this code in GCC 3.2 or later. */
1055 if (ix86_align_loops_string)
1057 warning ("-malign-loops is obsolete, use -falign-loops");
1058 if (align_loops == 0)
1060 i = atoi (ix86_align_loops_string);
1061 if (i < 0 || i > MAX_CODE_ALIGN)
1062 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1063 else
1064 align_loops = 1 << i;
1068 if (ix86_align_jumps_string)
1070 warning ("-malign-jumps is obsolete, use -falign-jumps");
1071 if (align_jumps == 0)
1073 i = atoi (ix86_align_jumps_string);
1074 if (i < 0 || i > MAX_CODE_ALIGN)
1075 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1076 else
1077 align_jumps = 1 << i;
1081 if (ix86_align_funcs_string)
1083 warning ("-malign-functions is obsolete, use -falign-functions");
1084 if (align_functions == 0)
1086 i = atoi (ix86_align_funcs_string);
1087 if (i < 0 || i > MAX_CODE_ALIGN)
1088 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1089 else
1090 align_functions = 1 << i;
1094 /* Default align_* from the processor table. */
1095 if (align_loops == 0)
1097 align_loops = processor_target_table[ix86_cpu].align_loop;
1098 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1100 if (align_jumps == 0)
1102 align_jumps = processor_target_table[ix86_cpu].align_jump;
1103 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1105 if (align_functions == 0)
1107 align_functions = processor_target_table[ix86_cpu].align_func;
1110 /* Validate -mpreferred-stack-boundary= value, or provide default.
1111 The default of 128 bits is for Pentium III's SSE __m128, but we
1112 don't want additional code to keep the stack aligned when
1113 optimizing for code size. */
1114 ix86_preferred_stack_boundary = (optimize_size
1115 ? TARGET_64BIT ? 64 : 32
1116 : 128);
1117 if (ix86_preferred_stack_boundary_string)
1119 i = atoi (ix86_preferred_stack_boundary_string);
1120 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1121 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1122 TARGET_64BIT ? 3 : 2);
1123 else
1124 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1127 /* Validate -mbranch-cost= value, or provide default. */
1128 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1129 if (ix86_branch_cost_string)
1131 i = atoi (ix86_branch_cost_string);
1132 if (i < 0 || i > 5)
1133 error ("-mbranch-cost=%d is not between 0 and 5", i);
1134 else
1135 ix86_branch_cost = i;
1138 if (ix86_tls_dialect_string)
1140 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1141 ix86_tls_dialect = TLS_DIALECT_GNU;
1142 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1143 ix86_tls_dialect = TLS_DIALECT_SUN;
1144 else
1145 error ("bad value (%s) for -mtls-dialect= switch",
1146 ix86_tls_dialect_string);
1149 /* Keep nonleaf frame pointers. */
1150 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1151 flag_omit_frame_pointer = 1;
1153 /* If we're doing fast math, we don't care about comparison order
1154 wrt NaNs. This lets us use a shorter comparison sequence. */
1155 if (flag_unsafe_math_optimizations)
1156 target_flags &= ~MASK_IEEE_FP;
1158 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1159 since the insns won't need emulation. */
1160 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1161 target_flags &= ~MASK_NO_FANCY_MATH_387;
1163 if (TARGET_64BIT)
1165 if (TARGET_ALIGN_DOUBLE)
1166 error ("-malign-double makes no sense in the 64bit mode");
1167 if (TARGET_RTD)
1168 error ("-mrtd calling convention not supported in the 64bit mode");
1169 /* Enable by default the SSE and MMX builtins. */
1170 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1171 ix86_fpmath = FPMATH_SSE;
1173 else
1174 ix86_fpmath = FPMATH_387;
1176 if (ix86_fpmath_string != 0)
1178 if (! strcmp (ix86_fpmath_string, "387"))
1179 ix86_fpmath = FPMATH_387;
1180 else if (! strcmp (ix86_fpmath_string, "sse"))
1182 if (!TARGET_SSE)
1184 warning ("SSE instruction set disabled, using 387 arithmetics");
1185 ix86_fpmath = FPMATH_387;
1187 else
1188 ix86_fpmath = FPMATH_SSE;
1190 else if (! strcmp (ix86_fpmath_string, "387,sse")
1191 || ! strcmp (ix86_fpmath_string, "sse,387"))
1193 if (!TARGET_SSE)
1195 warning ("SSE instruction set disabled, using 387 arithmetics");
1196 ix86_fpmath = FPMATH_387;
1198 else if (!TARGET_80387)
1200 warning ("387 instruction set disabled, using SSE arithmetics");
1201 ix86_fpmath = FPMATH_SSE;
1203 else
1204 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1206 else
1207 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1210 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1211 on by -msse. */
1212 if (TARGET_SSE)
1214 target_flags |= MASK_MMX;
1215 x86_prefetch_sse = true;
1218 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1219 if (TARGET_3DNOW)
1221 target_flags |= MASK_MMX;
1222 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1223 extensions it adds. */
1224 if (x86_3dnow_a & (1 << ix86_arch))
1225 target_flags |= MASK_3DNOW_A;
1227 if ((x86_accumulate_outgoing_args & CPUMASK)
1228 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1229 && !optimize_size)
1230 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1232 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1234 char *p;
1235 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1236 p = strchr (internal_label_prefix, 'X');
1237 internal_label_prefix_len = p - internal_label_prefix;
1238 *p = '\0';
1242 void
1243 optimization_options (level, size)
1244 int level;
1245 int size ATTRIBUTE_UNUSED;
1247 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1248 make the problem with not enough registers even worse. */
1249 #ifdef INSN_SCHEDULING
1250 if (level > 1)
1251 flag_schedule_insns = 0;
1252 #endif
1253 if (TARGET_64BIT && optimize >= 1)
1254 flag_omit_frame_pointer = 1;
1255 if (TARGET_64BIT)
1257 flag_pcc_struct_return = 0;
1258 flag_asynchronous_unwind_tables = 1;
1262 /* Table of valid machine attributes. */
1263 const struct attribute_spec ix86_attribute_table[] =
1265 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1266 /* Stdcall attribute says callee is responsible for popping arguments
1267 if they are not variable. */
1268 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1269 /* Cdecl attribute says the callee is a normal C declaration */
1270 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1271 /* Regparm attribute specifies how many integer arguments are to be
1272 passed in registers. */
1273 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1274 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1275 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1276 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1277 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1278 #endif
1279 { NULL, 0, 0, false, false, false, NULL }
1282 /* Handle a "cdecl" or "stdcall" attribute;
1283 arguments as in struct attribute_spec.handler. */
1284 static tree
1285 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1286 tree *node;
1287 tree name;
1288 tree args ATTRIBUTE_UNUSED;
1289 int flags ATTRIBUTE_UNUSED;
1290 bool *no_add_attrs;
1292 if (TREE_CODE (*node) != FUNCTION_TYPE
1293 && TREE_CODE (*node) != METHOD_TYPE
1294 && TREE_CODE (*node) != FIELD_DECL
1295 && TREE_CODE (*node) != TYPE_DECL)
1297 warning ("`%s' attribute only applies to functions",
1298 IDENTIFIER_POINTER (name));
1299 *no_add_attrs = true;
1302 if (TARGET_64BIT)
1304 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1305 *no_add_attrs = true;
1308 return NULL_TREE;
1311 /* Handle a "regparm" attribute;
1312 arguments as in struct attribute_spec.handler. */
1313 static tree
1314 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1315 tree *node;
1316 tree name;
1317 tree args;
1318 int flags ATTRIBUTE_UNUSED;
1319 bool *no_add_attrs;
1321 if (TREE_CODE (*node) != FUNCTION_TYPE
1322 && TREE_CODE (*node) != METHOD_TYPE
1323 && TREE_CODE (*node) != FIELD_DECL
1324 && TREE_CODE (*node) != TYPE_DECL)
1326 warning ("`%s' attribute only applies to functions",
1327 IDENTIFIER_POINTER (name));
1328 *no_add_attrs = true;
1330 else
1332 tree cst;
1334 cst = TREE_VALUE (args);
1335 if (TREE_CODE (cst) != INTEGER_CST)
1337 warning ("`%s' attribute requires an integer constant argument",
1338 IDENTIFIER_POINTER (name));
1339 *no_add_attrs = true;
1341 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1343 warning ("argument to `%s' attribute larger than %d",
1344 IDENTIFIER_POINTER (name), REGPARM_MAX);
1345 *no_add_attrs = true;
1349 return NULL_TREE;
1352 /* Return 0 if the attributes for two types are incompatible, 1 if they
1353 are compatible, and 2 if they are nearly compatible (which causes a
1354 warning to be generated). */
1356 static int
1357 ix86_comp_type_attributes (type1, type2)
1358 tree type1;
1359 tree type2;
1361 /* Check for mismatch of non-default calling convention. */
1362 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1364 if (TREE_CODE (type1) != FUNCTION_TYPE)
1365 return 1;
1367 /* Check for mismatched return types (cdecl vs stdcall). */
1368 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1369 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1370 return 0;
1371 return 1;
1374 /* Value is the number of bytes of arguments automatically
1375 popped when returning from a subroutine call.
1376 FUNDECL is the declaration node of the function (as a tree),
1377 FUNTYPE is the data type of the function (as a tree),
1378 or for a library call it is an identifier node for the subroutine name.
1379 SIZE is the number of bytes of arguments passed on the stack.
1381 On the 80386, the RTD insn may be used to pop them if the number
1382 of args is fixed, but if the number is variable then the caller
1383 must pop them all. RTD can't be used for library calls now
1384 because the library is compiled with the Unix compiler.
1385 Use of RTD is a selectable option, since it is incompatible with
1386 standard Unix calling sequences. If the option is not selected,
1387 the caller must always pop the args.
1389 The attribute stdcall is equivalent to RTD on a per module basis. */
1392 ix86_return_pops_args (fundecl, funtype, size)
1393 tree fundecl;
1394 tree funtype;
1395 int size;
1397 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1399 /* Cdecl functions override -mrtd, and never pop the stack. */
1400 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1402 /* Stdcall functions will pop the stack if not variable args. */
1403 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1404 rtd = 1;
1406 if (rtd
1407 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1408 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1409 == void_type_node)))
1410 return size;
1413 /* Lose any fake structure return argument if it is passed on the stack. */
1414 if (aggregate_value_p (TREE_TYPE (funtype))
1415 && !TARGET_64BIT)
1417 int nregs = ix86_regparm;
1419 if (funtype)
1421 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1423 if (attr)
1424 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1427 if (!nregs)
1428 return GET_MODE_SIZE (Pmode);
1431 return 0;
1434 /* Argument support functions. */
1436 /* Return true when register may be used to pass function parameters. */
1437 bool
1438 ix86_function_arg_regno_p (regno)
1439 int regno;
1441 int i;
1442 if (!TARGET_64BIT)
1443 return (regno < REGPARM_MAX
1444 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1445 if (SSE_REGNO_P (regno) && TARGET_SSE)
1446 return true;
1447 /* RAX is used as hidden argument to va_arg functions. */
1448 if (!regno)
1449 return true;
1450 for (i = 0; i < REGPARM_MAX; i++)
1451 if (regno == x86_64_int_parameter_registers[i])
1452 return true;
1453 return false;
1456 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1457 for a call to a function whose data type is FNTYPE.
1458 For a library call, FNTYPE is 0. */
1460 void
1461 init_cumulative_args (cum, fntype, libname)
1462 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1463 tree fntype; /* tree ptr for function decl */
1464 rtx libname; /* SYMBOL_REF of library name or 0 */
1466 static CUMULATIVE_ARGS zero_cum;
1467 tree param, next_param;
1469 if (TARGET_DEBUG_ARG)
1471 fprintf (stderr, "\ninit_cumulative_args (");
1472 if (fntype)
1473 fprintf (stderr, "fntype code = %s, ret code = %s",
1474 tree_code_name[(int) TREE_CODE (fntype)],
1475 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1476 else
1477 fprintf (stderr, "no fntype");
1479 if (libname)
1480 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1483 *cum = zero_cum;
1485 /* Set up the number of registers to use for passing arguments. */
1486 cum->nregs = ix86_regparm;
1487 cum->sse_nregs = SSE_REGPARM_MAX;
1488 if (fntype && !TARGET_64BIT)
1490 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1492 if (attr)
1493 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1495 cum->maybe_vaarg = false;
1497 /* Determine if this function has variable arguments. This is
1498 indicated by the last argument being 'void_type_mode' if there
1499 are no variable arguments. If there are variable arguments, then
1500 we won't pass anything in registers */
1502 if (cum->nregs)
1504 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1505 param != 0; param = next_param)
1507 next_param = TREE_CHAIN (param);
1508 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1510 if (!TARGET_64BIT)
1511 cum->nregs = 0;
1512 cum->maybe_vaarg = true;
1516 if ((!fntype && !libname)
1517 || (fntype && !TYPE_ARG_TYPES (fntype)))
1518 cum->maybe_vaarg = 1;
1520 if (TARGET_DEBUG_ARG)
1521 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1523 return;
1526 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1527 of this code is to classify each 8bytes of incoming argument by the register
1528 class and assign registers accordingly. */
1530 /* Return the union class of CLASS1 and CLASS2.
1531 See the x86-64 PS ABI for details. */
1533 static enum x86_64_reg_class
1534 merge_classes (class1, class2)
1535 enum x86_64_reg_class class1, class2;
1537 /* Rule #1: If both classes are equal, this is the resulting class. */
1538 if (class1 == class2)
1539 return class1;
1541 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1542 the other class. */
1543 if (class1 == X86_64_NO_CLASS)
1544 return class2;
1545 if (class2 == X86_64_NO_CLASS)
1546 return class1;
1548 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1549 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1550 return X86_64_MEMORY_CLASS;
1552 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1553 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1554 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1555 return X86_64_INTEGERSI_CLASS;
1556 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1557 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1558 return X86_64_INTEGER_CLASS;
1560 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1561 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1562 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1563 return X86_64_MEMORY_CLASS;
1565 /* Rule #6: Otherwise class SSE is used. */
1566 return X86_64_SSE_CLASS;
1569 /* Classify the argument of type TYPE and mode MODE.
1570 CLASSES will be filled by the register class used to pass each word
1571 of the operand. The number of words is returned. In case the parameter
1572 should be passed in memory, 0 is returned. As a special case for zero
1573 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1575 BIT_OFFSET is used internally for handling records and specifies offset
1576 of the offset in bits modulo 256 to avoid overflow cases.
1578 See the x86-64 PS ABI for details.
1581 static int
1582 classify_argument (mode, type, classes, bit_offset)
1583 enum machine_mode mode;
1584 tree type;
1585 enum x86_64_reg_class classes[MAX_CLASSES];
1586 int bit_offset;
1588 int bytes =
1589 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1590 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1592 if (type && AGGREGATE_TYPE_P (type))
1594 int i;
1595 tree field;
1596 enum x86_64_reg_class subclasses[MAX_CLASSES];
1598 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1599 if (bytes > 16)
1600 return 0;
1602 for (i = 0; i < words; i++)
1603 classes[i] = X86_64_NO_CLASS;
1605 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1606 signalize memory class, so handle it as special case. */
1607 if (!words)
1609 classes[0] = X86_64_NO_CLASS;
1610 return 1;
1613 /* Classify each field of record and merge classes. */
1614 if (TREE_CODE (type) == RECORD_TYPE)
1616 /* For classes first merge in the field of the subclasses. */
1617 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1619 tree bases = TYPE_BINFO_BASETYPES (type);
1620 int n_bases = TREE_VEC_LENGTH (bases);
1621 int i;
1623 for (i = 0; i < n_bases; ++i)
1625 tree binfo = TREE_VEC_ELT (bases, i);
1626 int num;
1627 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1628 tree type = BINFO_TYPE (binfo);
1630 num = classify_argument (TYPE_MODE (type),
1631 type, subclasses,
1632 (offset + bit_offset) % 256);
1633 if (!num)
1634 return 0;
1635 for (i = 0; i < num; i++)
1637 int pos = (offset + bit_offset) / 8 / 8;
1638 classes[i + pos] =
1639 merge_classes (subclasses[i], classes[i + pos]);
1643 /* And now merge the fields of structure. */
1644 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1646 if (TREE_CODE (field) == FIELD_DECL)
1648 int num;
1650 /* Bitfields are always classified as integer. Handle them
1651 early, since later code would consider them to be
1652 misaligned integers. */
1653 if (DECL_BIT_FIELD (field))
1655 for (i = int_bit_position (field) / 8 / 8;
1656 i < (int_bit_position (field)
1657 + tree_low_cst (DECL_SIZE (field), 0)
1658 + 63) / 8 / 8; i++)
1659 classes[i] =
1660 merge_classes (X86_64_INTEGER_CLASS,
1661 classes[i]);
1663 else
1665 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1666 TREE_TYPE (field), subclasses,
1667 (int_bit_position (field)
1668 + bit_offset) % 256);
1669 if (!num)
1670 return 0;
1671 for (i = 0; i < num; i++)
1673 int pos =
1674 (int_bit_position (field) + bit_offset) / 8 / 8;
1675 classes[i + pos] =
1676 merge_classes (subclasses[i], classes[i + pos]);
1682 /* Arrays are handled as small records. */
1683 else if (TREE_CODE (type) == ARRAY_TYPE)
1685 int num;
1686 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1687 TREE_TYPE (type), subclasses, bit_offset);
1688 if (!num)
1689 return 0;
1691 /* The partial classes are now full classes. */
1692 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1693 subclasses[0] = X86_64_SSE_CLASS;
1694 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1695 subclasses[0] = X86_64_INTEGER_CLASS;
1697 for (i = 0; i < words; i++)
1698 classes[i] = subclasses[i % num];
1700 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1701 else if (TREE_CODE (type) == UNION_TYPE
1702 || TREE_CODE (type) == QUAL_UNION_TYPE)
1704 /* For classes first merge in the field of the subclasses. */
1705 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1707 tree bases = TYPE_BINFO_BASETYPES (type);
1708 int n_bases = TREE_VEC_LENGTH (bases);
1709 int i;
1711 for (i = 0; i < n_bases; ++i)
1713 tree binfo = TREE_VEC_ELT (bases, i);
1714 int num;
1715 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1716 tree type = BINFO_TYPE (binfo);
1718 num = classify_argument (TYPE_MODE (type),
1719 type, subclasses,
1720 (offset + bit_offset) % 256);
1721 if (!num)
1722 return 0;
1723 for (i = 0; i < num; i++)
1725 int pos = (offset + bit_offset) / 8 / 8;
1726 classes[i + pos] =
1727 merge_classes (subclasses[i], classes[i + pos]);
1731 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1733 if (TREE_CODE (field) == FIELD_DECL)
1735 int num;
1736 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1737 TREE_TYPE (field), subclasses,
1738 bit_offset);
1739 if (!num)
1740 return 0;
1741 for (i = 0; i < num; i++)
1742 classes[i] = merge_classes (subclasses[i], classes[i]);
1746 else
1747 abort ();
1749 /* Final merger cleanup. */
1750 for (i = 0; i < words; i++)
1752 /* If one class is MEMORY, everything should be passed in
1753 memory. */
1754 if (classes[i] == X86_64_MEMORY_CLASS)
1755 return 0;
1757 /* The X86_64_SSEUP_CLASS should be always preceded by
1758 X86_64_SSE_CLASS. */
1759 if (classes[i] == X86_64_SSEUP_CLASS
1760 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1761 classes[i] = X86_64_SSE_CLASS;
1763 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1764 if (classes[i] == X86_64_X87UP_CLASS
1765 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1766 classes[i] = X86_64_SSE_CLASS;
1768 return words;
1771 /* Compute alignment needed. We align all types to natural boundaries with
1772 exception of XFmode that is aligned to 64bits. */
1773 if (mode != VOIDmode && mode != BLKmode)
1775 int mode_alignment = GET_MODE_BITSIZE (mode);
1777 if (mode == XFmode)
1778 mode_alignment = 128;
1779 else if (mode == XCmode)
1780 mode_alignment = 256;
1781 /* Misaligned fields are always returned in memory. */
1782 if (bit_offset % mode_alignment)
1783 return 0;
1786 /* Classification of atomic types. */
1787 switch (mode)
1789 case DImode:
1790 case SImode:
1791 case HImode:
1792 case QImode:
1793 case CSImode:
1794 case CHImode:
1795 case CQImode:
1796 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1797 classes[0] = X86_64_INTEGERSI_CLASS;
1798 else
1799 classes[0] = X86_64_INTEGER_CLASS;
1800 return 1;
1801 case CDImode:
1802 case TImode:
1803 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1804 return 2;
1805 case CTImode:
1806 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1807 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1808 return 4;
1809 case SFmode:
1810 if (!(bit_offset % 64))
1811 classes[0] = X86_64_SSESF_CLASS;
1812 else
1813 classes[0] = X86_64_SSE_CLASS;
1814 return 1;
1815 case DFmode:
1816 classes[0] = X86_64_SSEDF_CLASS;
1817 return 1;
1818 case TFmode:
1819 classes[0] = X86_64_X87_CLASS;
1820 classes[1] = X86_64_X87UP_CLASS;
1821 return 2;
1822 case TCmode:
1823 classes[0] = X86_64_X87_CLASS;
1824 classes[1] = X86_64_X87UP_CLASS;
1825 classes[2] = X86_64_X87_CLASS;
1826 classes[3] = X86_64_X87UP_CLASS;
1827 return 4;
1828 case DCmode:
1829 classes[0] = X86_64_SSEDF_CLASS;
1830 classes[1] = X86_64_SSEDF_CLASS;
1831 return 2;
1832 case SCmode:
1833 classes[0] = X86_64_SSE_CLASS;
1834 return 1;
1835 case V4SFmode:
1836 case V4SImode:
1837 classes[0] = X86_64_SSE_CLASS;
1838 classes[1] = X86_64_SSEUP_CLASS;
1839 return 2;
1840 case V2SFmode:
1841 case V2SImode:
1842 case V4HImode:
1843 case V8QImode:
1844 classes[0] = X86_64_SSE_CLASS;
1845 return 1;
1846 case BLKmode:
1847 case VOIDmode:
1848 return 0;
1849 default:
1850 abort ();
1854 /* Examine the argument and return set number of register required in each
1855 class. Return 0 iff parameter should be passed in memory. */
1856 static int
1857 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1858 enum machine_mode mode;
1859 tree type;
1860 int *int_nregs, *sse_nregs;
1861 int in_return;
1863 enum x86_64_reg_class class[MAX_CLASSES];
1864 int n = classify_argument (mode, type, class, 0);
1866 *int_nregs = 0;
1867 *sse_nregs = 0;
1868 if (!n)
1869 return 0;
1870 for (n--; n >= 0; n--)
1871 switch (class[n])
1873 case X86_64_INTEGER_CLASS:
1874 case X86_64_INTEGERSI_CLASS:
1875 (*int_nregs)++;
1876 break;
1877 case X86_64_SSE_CLASS:
1878 case X86_64_SSESF_CLASS:
1879 case X86_64_SSEDF_CLASS:
1880 (*sse_nregs)++;
1881 break;
1882 case X86_64_NO_CLASS:
1883 case X86_64_SSEUP_CLASS:
1884 break;
1885 case X86_64_X87_CLASS:
1886 case X86_64_X87UP_CLASS:
1887 if (!in_return)
1888 return 0;
1889 break;
1890 case X86_64_MEMORY_CLASS:
1891 abort ();
1893 return 1;
1895 /* Construct container for the argument used by GCC interface. See
1896 FUNCTION_ARG for the detailed description. */
1897 static rtx
1898 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1899 enum machine_mode mode;
1900 tree type;
1901 int in_return;
1902 int nintregs, nsseregs;
1903 const int * intreg;
1904 int sse_regno;
1906 enum machine_mode tmpmode;
1907 int bytes =
1908 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1909 enum x86_64_reg_class class[MAX_CLASSES];
1910 int n;
1911 int i;
1912 int nexps = 0;
1913 int needed_sseregs, needed_intregs;
1914 rtx exp[MAX_CLASSES];
1915 rtx ret;
1917 n = classify_argument (mode, type, class, 0);
1918 if (TARGET_DEBUG_ARG)
1920 if (!n)
1921 fprintf (stderr, "Memory class\n");
1922 else
1924 fprintf (stderr, "Classes:");
1925 for (i = 0; i < n; i++)
1927 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1929 fprintf (stderr, "\n");
1932 if (!n)
1933 return NULL;
1934 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1935 return NULL;
1936 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1937 return NULL;
1939 /* First construct simple cases. Avoid SCmode, since we want to use
1940 single register to pass this type. */
1941 if (n == 1 && mode != SCmode)
1942 switch (class[0])
1944 case X86_64_INTEGER_CLASS:
1945 case X86_64_INTEGERSI_CLASS:
1946 return gen_rtx_REG (mode, intreg[0]);
1947 case X86_64_SSE_CLASS:
1948 case X86_64_SSESF_CLASS:
1949 case X86_64_SSEDF_CLASS:
1950 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1951 case X86_64_X87_CLASS:
1952 return gen_rtx_REG (mode, FIRST_STACK_REG);
1953 case X86_64_NO_CLASS:
1954 /* Zero sized array, struct or class. */
1955 return NULL;
1956 default:
1957 abort ();
1959 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1960 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1961 if (n == 2
1962 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1963 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1964 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1965 && class[1] == X86_64_INTEGER_CLASS
1966 && (mode == CDImode || mode == TImode)
1967 && intreg[0] + 1 == intreg[1])
1968 return gen_rtx_REG (mode, intreg[0]);
1969 if (n == 4
1970 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1971 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1972 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1974 /* Otherwise figure out the entries of the PARALLEL. */
1975 for (i = 0; i < n; i++)
1977 switch (class[i])
1979 case X86_64_NO_CLASS:
1980 break;
1981 case X86_64_INTEGER_CLASS:
1982 case X86_64_INTEGERSI_CLASS:
1983 /* Merge TImodes on aligned occassions here too. */
1984 if (i * 8 + 8 > bytes)
1985 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1986 else if (class[i] == X86_64_INTEGERSI_CLASS)
1987 tmpmode = SImode;
1988 else
1989 tmpmode = DImode;
1990 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1991 if (tmpmode == BLKmode)
1992 tmpmode = DImode;
1993 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1994 gen_rtx_REG (tmpmode, *intreg),
1995 GEN_INT (i*8));
1996 intreg++;
1997 break;
1998 case X86_64_SSESF_CLASS:
1999 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2000 gen_rtx_REG (SFmode,
2001 SSE_REGNO (sse_regno)),
2002 GEN_INT (i*8));
2003 sse_regno++;
2004 break;
2005 case X86_64_SSEDF_CLASS:
2006 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2007 gen_rtx_REG (DFmode,
2008 SSE_REGNO (sse_regno)),
2009 GEN_INT (i*8));
2010 sse_regno++;
2011 break;
2012 case X86_64_SSE_CLASS:
2013 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2014 tmpmode = TImode, i++;
2015 else
2016 tmpmode = DImode;
2017 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2018 gen_rtx_REG (tmpmode,
2019 SSE_REGNO (sse_regno)),
2020 GEN_INT (i*8));
2021 sse_regno++;
2022 break;
2023 default:
2024 abort ();
2027 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2028 for (i = 0; i < nexps; i++)
2029 XVECEXP (ret, 0, i) = exp [i];
2030 return ret;
2033 /* Update the data in CUM to advance over an argument
2034 of mode MODE and data type TYPE.
2035 (TYPE is null for libcalls where that information may not be available.) */
2037 void
2038 function_arg_advance (cum, mode, type, named)
2039 CUMULATIVE_ARGS *cum; /* current arg information */
2040 enum machine_mode mode; /* current arg mode */
2041 tree type; /* type of the argument or 0 if lib support */
2042 int named; /* whether or not the argument was named */
2044 int bytes =
2045 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2046 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2048 if (TARGET_DEBUG_ARG)
2049 fprintf (stderr,
2050 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2051 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2052 if (TARGET_64BIT)
2054 int int_nregs, sse_nregs;
2055 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2056 cum->words += words;
2057 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2059 cum->nregs -= int_nregs;
2060 cum->sse_nregs -= sse_nregs;
2061 cum->regno += int_nregs;
2062 cum->sse_regno += sse_nregs;
2064 else
2065 cum->words += words;
2067 else
2069 if (TARGET_SSE && mode == TImode)
2071 cum->sse_words += words;
2072 cum->sse_nregs -= 1;
2073 cum->sse_regno += 1;
2074 if (cum->sse_nregs <= 0)
2076 cum->sse_nregs = 0;
2077 cum->sse_regno = 0;
2080 else
2082 cum->words += words;
2083 cum->nregs -= words;
2084 cum->regno += words;
2086 if (cum->nregs <= 0)
2088 cum->nregs = 0;
2089 cum->regno = 0;
2093 return;
2096 /* Define where to put the arguments to a function.
2097 Value is zero to push the argument on the stack,
2098 or a hard register in which to store the argument.
2100 MODE is the argument's machine mode.
2101 TYPE is the data type of the argument (as a tree).
2102 This is null for libcalls where that information may
2103 not be available.
2104 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2105 the preceding args and about the function being called.
2106 NAMED is nonzero if this argument is a named parameter
2107 (otherwise it is an extra parameter matching an ellipsis). */
2110 function_arg (cum, mode, type, named)
2111 CUMULATIVE_ARGS *cum; /* current arg information */
2112 enum machine_mode mode; /* current arg mode */
2113 tree type; /* type of the argument or 0 if lib support */
2114 int named; /* != 0 for normal args, == 0 for ... args */
2116 rtx ret = NULL_RTX;
2117 int bytes =
2118 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2119 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2121 /* Handle an hidden AL argument containing number of registers for varargs
2122 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2123 any AL settings. */
2124 if (mode == VOIDmode)
2126 if (TARGET_64BIT)
2127 return GEN_INT (cum->maybe_vaarg
2128 ? (cum->sse_nregs < 0
2129 ? SSE_REGPARM_MAX
2130 : cum->sse_regno)
2131 : -1);
2132 else
2133 return constm1_rtx;
2135 if (TARGET_64BIT)
2136 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2137 &x86_64_int_parameter_registers [cum->regno],
2138 cum->sse_regno);
2139 else
2140 switch (mode)
2142 /* For now, pass fp/complex values on the stack. */
2143 default:
2144 break;
2146 case BLKmode:
2147 case DImode:
2148 case SImode:
2149 case HImode:
2150 case QImode:
2151 if (words <= cum->nregs)
2152 ret = gen_rtx_REG (mode, cum->regno);
2153 break;
2154 case TImode:
2155 if (cum->sse_nregs)
2156 ret = gen_rtx_REG (mode, cum->sse_regno);
2157 break;
2160 if (TARGET_DEBUG_ARG)
2162 fprintf (stderr,
2163 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2164 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2166 if (ret)
2167 print_simple_rtl (stderr, ret);
2168 else
2169 fprintf (stderr, ", stack");
2171 fprintf (stderr, " )\n");
2174 return ret;
2177 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2178 and type. */
2181 ix86_function_arg_boundary (mode, type)
2182 enum machine_mode mode;
2183 tree type;
2185 int align;
2186 if (!TARGET_64BIT)
2187 return PARM_BOUNDARY;
2188 if (type)
2189 align = TYPE_ALIGN (type);
2190 else
2191 align = GET_MODE_ALIGNMENT (mode);
2192 if (align < PARM_BOUNDARY)
2193 align = PARM_BOUNDARY;
2194 if (align > 128)
2195 align = 128;
2196 return align;
2199 /* Return true if N is a possible register number of function value. */
2200 bool
2201 ix86_function_value_regno_p (regno)
2202 int regno;
2204 if (!TARGET_64BIT)
2206 return ((regno) == 0
2207 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2208 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2210 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2211 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2212 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2215 /* Define how to find the value returned by a function.
2216 VALTYPE is the data type of the value (as a tree).
2217 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2218 otherwise, FUNC is 0. */
2220 ix86_function_value (valtype)
2221 tree valtype;
2223 if (TARGET_64BIT)
2225 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2226 REGPARM_MAX, SSE_REGPARM_MAX,
2227 x86_64_int_return_registers, 0);
2228 /* For zero sized structures, construct_continer return NULL, but we need
2229 to keep rest of compiler happy by returning meaningfull value. */
2230 if (!ret)
2231 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2232 return ret;
2234 else
2235 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2238 /* Return false iff type is returned in memory. */
2240 ix86_return_in_memory (type)
2241 tree type;
2243 int needed_intregs, needed_sseregs;
2244 if (TARGET_64BIT)
2246 return !examine_argument (TYPE_MODE (type), type, 1,
2247 &needed_intregs, &needed_sseregs);
2249 else
2251 if (TYPE_MODE (type) == BLKmode
2252 || (VECTOR_MODE_P (TYPE_MODE (type))
2253 && int_size_in_bytes (type) == 8)
2254 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2255 && TYPE_MODE (type) != TFmode
2256 && !VECTOR_MODE_P (TYPE_MODE (type))))
2257 return 1;
2258 return 0;
2262 /* Define how to find the value returned by a library function
2263 assuming the value has mode MODE. */
2265 ix86_libcall_value (mode)
2266 enum machine_mode mode;
2268 if (TARGET_64BIT)
2270 switch (mode)
2272 case SFmode:
2273 case SCmode:
2274 case DFmode:
2275 case DCmode:
2276 return gen_rtx_REG (mode, FIRST_SSE_REG);
2277 case TFmode:
2278 case TCmode:
2279 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2280 default:
2281 return gen_rtx_REG (mode, 0);
2284 else
2285 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2288 /* Create the va_list data type. */
2290 tree
2291 ix86_build_va_list ()
2293 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2295 /* For i386 we use plain pointer to argument area. */
2296 if (!TARGET_64BIT)
2297 return build_pointer_type (char_type_node);
2299 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2300 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2302 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2303 unsigned_type_node);
2304 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2305 unsigned_type_node);
2306 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2307 ptr_type_node);
2308 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2309 ptr_type_node);
2311 DECL_FIELD_CONTEXT (f_gpr) = record;
2312 DECL_FIELD_CONTEXT (f_fpr) = record;
2313 DECL_FIELD_CONTEXT (f_ovf) = record;
2314 DECL_FIELD_CONTEXT (f_sav) = record;
2316 TREE_CHAIN (record) = type_decl;
2317 TYPE_NAME (record) = type_decl;
2318 TYPE_FIELDS (record) = f_gpr;
2319 TREE_CHAIN (f_gpr) = f_fpr;
2320 TREE_CHAIN (f_fpr) = f_ovf;
2321 TREE_CHAIN (f_ovf) = f_sav;
2323 layout_type (record);
2325 /* The correct type is an array type of one element. */
2326 return build_array_type (record, build_index_type (size_zero_node));
2329 /* Perform any needed actions needed for a function that is receiving a
2330 variable number of arguments.
2332 CUM is as above.
2334 MODE and TYPE are the mode and type of the current parameter.
2336 PRETEND_SIZE is a variable that should be set to the amount of stack
2337 that must be pushed by the prolog to pretend that our caller pushed
2340 Normally, this macro will push all remaining incoming registers on the
2341 stack and set PRETEND_SIZE to the length of the registers pushed. */
2343 void
2344 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2345 CUMULATIVE_ARGS *cum;
2346 enum machine_mode mode;
2347 tree type;
2348 int *pretend_size ATTRIBUTE_UNUSED;
2349 int no_rtl;
2352 CUMULATIVE_ARGS next_cum;
2353 rtx save_area = NULL_RTX, mem;
2354 rtx label;
2355 rtx label_ref;
2356 rtx tmp_reg;
2357 rtx nsse_reg;
2358 int set;
2359 tree fntype;
2360 int stdarg_p;
2361 int i;
2363 if (!TARGET_64BIT)
2364 return;
2366 /* Indicate to allocate space on the stack for varargs save area. */
2367 ix86_save_varrargs_registers = 1;
2369 fntype = TREE_TYPE (current_function_decl);
2370 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2371 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2372 != void_type_node));
2374 /* For varargs, we do not want to skip the dummy va_dcl argument.
2375 For stdargs, we do want to skip the last named argument. */
2376 next_cum = *cum;
2377 if (stdarg_p)
2378 function_arg_advance (&next_cum, mode, type, 1);
2380 if (!no_rtl)
2381 save_area = frame_pointer_rtx;
2383 set = get_varargs_alias_set ();
2385 for (i = next_cum.regno; i < ix86_regparm; i++)
2387 mem = gen_rtx_MEM (Pmode,
2388 plus_constant (save_area, i * UNITS_PER_WORD));
2389 set_mem_alias_set (mem, set);
2390 emit_move_insn (mem, gen_rtx_REG (Pmode,
2391 x86_64_int_parameter_registers[i]));
2394 if (next_cum.sse_nregs)
2396 /* Now emit code to save SSE registers. The AX parameter contains number
2397 of SSE parameter regsiters used to call this function. We use
2398 sse_prologue_save insn template that produces computed jump across
2399 SSE saves. We need some preparation work to get this working. */
2401 label = gen_label_rtx ();
2402 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2404 /* Compute address to jump to :
2405 label - 5*eax + nnamed_sse_arguments*5 */
2406 tmp_reg = gen_reg_rtx (Pmode);
2407 nsse_reg = gen_reg_rtx (Pmode);
2408 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2409 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2410 gen_rtx_MULT (Pmode, nsse_reg,
2411 GEN_INT (4))));
2412 if (next_cum.sse_regno)
2413 emit_move_insn
2414 (nsse_reg,
2415 gen_rtx_CONST (DImode,
2416 gen_rtx_PLUS (DImode,
2417 label_ref,
2418 GEN_INT (next_cum.sse_regno * 4))));
2419 else
2420 emit_move_insn (nsse_reg, label_ref);
2421 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2423 /* Compute address of memory block we save into. We always use pointer
2424 pointing 127 bytes after first byte to store - this is needed to keep
2425 instruction size limited by 4 bytes. */
2426 tmp_reg = gen_reg_rtx (Pmode);
2427 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2428 plus_constant (save_area,
2429 8 * REGPARM_MAX + 127)));
2430 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2431 set_mem_alias_set (mem, set);
2432 set_mem_align (mem, BITS_PER_WORD);
2434 /* And finally do the dirty job! */
2435 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2436 GEN_INT (next_cum.sse_regno), label));
2441 /* Implement va_start. */
2443 void
2444 ix86_va_start (stdarg_p, valist, nextarg)
2445 int stdarg_p;
2446 tree valist;
2447 rtx nextarg;
2449 HOST_WIDE_INT words, n_gpr, n_fpr;
2450 tree f_gpr, f_fpr, f_ovf, f_sav;
2451 tree gpr, fpr, ovf, sav, t;
2453 /* Only 64bit target needs something special. */
2454 if (!TARGET_64BIT)
2456 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2457 return;
2460 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2461 f_fpr = TREE_CHAIN (f_gpr);
2462 f_ovf = TREE_CHAIN (f_fpr);
2463 f_sav = TREE_CHAIN (f_ovf);
2465 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2466 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2467 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2468 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2469 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2471 /* Count number of gp and fp argument registers used. */
2472 words = current_function_args_info.words;
2473 n_gpr = current_function_args_info.regno;
2474 n_fpr = current_function_args_info.sse_regno;
2476 if (TARGET_DEBUG_ARG)
2477 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2478 (int) words, (int) n_gpr, (int) n_fpr);
2480 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2481 build_int_2 (n_gpr * 8, 0));
2482 TREE_SIDE_EFFECTS (t) = 1;
2483 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2485 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2486 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2487 TREE_SIDE_EFFECTS (t) = 1;
2488 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2490 /* Find the overflow area. */
2491 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2492 if (words != 0)
2493 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2494 build_int_2 (words * UNITS_PER_WORD, 0));
2495 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2496 TREE_SIDE_EFFECTS (t) = 1;
2497 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2499 /* Find the register save area.
2500 Prologue of the function save it right above stack frame. */
2501 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2502 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2503 TREE_SIDE_EFFECTS (t) = 1;
2504 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2507 /* Implement va_arg. */
2509 ix86_va_arg (valist, type)
2510 tree valist, type;
2512 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2513 tree f_gpr, f_fpr, f_ovf, f_sav;
2514 tree gpr, fpr, ovf, sav, t;
2515 int size, rsize;
2516 rtx lab_false, lab_over = NULL_RTX;
2517 rtx addr_rtx, r;
2518 rtx container;
2520 /* Only 64bit target needs something special. */
2521 if (!TARGET_64BIT)
2523 return std_expand_builtin_va_arg (valist, type);
2526 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2527 f_fpr = TREE_CHAIN (f_gpr);
2528 f_ovf = TREE_CHAIN (f_fpr);
2529 f_sav = TREE_CHAIN (f_ovf);
2531 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2532 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2533 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2534 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2535 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2537 size = int_size_in_bytes (type);
2538 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2540 container = construct_container (TYPE_MODE (type), type, 0,
2541 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2543 * Pull the value out of the saved registers ...
2546 addr_rtx = gen_reg_rtx (Pmode);
2548 if (container)
2550 rtx int_addr_rtx, sse_addr_rtx;
2551 int needed_intregs, needed_sseregs;
2552 int need_temp;
2554 lab_over = gen_label_rtx ();
2555 lab_false = gen_label_rtx ();
2557 examine_argument (TYPE_MODE (type), type, 0,
2558 &needed_intregs, &needed_sseregs);
2561 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2562 || TYPE_ALIGN (type) > 128);
2564 /* In case we are passing structure, verify that it is consetuctive block
2565 on the register save area. If not we need to do moves. */
2566 if (!need_temp && !REG_P (container))
2568 /* Verify that all registers are strictly consetuctive */
2569 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2571 int i;
2573 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2575 rtx slot = XVECEXP (container, 0, i);
2576 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2577 || INTVAL (XEXP (slot, 1)) != i * 16)
2578 need_temp = 1;
2581 else
2583 int i;
2585 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2587 rtx slot = XVECEXP (container, 0, i);
2588 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2589 || INTVAL (XEXP (slot, 1)) != i * 8)
2590 need_temp = 1;
2594 if (!need_temp)
2596 int_addr_rtx = addr_rtx;
2597 sse_addr_rtx = addr_rtx;
2599 else
2601 int_addr_rtx = gen_reg_rtx (Pmode);
2602 sse_addr_rtx = gen_reg_rtx (Pmode);
2604 /* First ensure that we fit completely in registers. */
2605 if (needed_intregs)
2607 emit_cmp_and_jump_insns (expand_expr
2608 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2609 GEN_INT ((REGPARM_MAX - needed_intregs +
2610 1) * 8), GE, const1_rtx, SImode,
2611 1, lab_false);
2613 if (needed_sseregs)
2615 emit_cmp_and_jump_insns (expand_expr
2616 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2617 GEN_INT ((SSE_REGPARM_MAX -
2618 needed_sseregs + 1) * 16 +
2619 REGPARM_MAX * 8), GE, const1_rtx,
2620 SImode, 1, lab_false);
2623 /* Compute index to start of area used for integer regs. */
2624 if (needed_intregs)
2626 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2627 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2628 if (r != int_addr_rtx)
2629 emit_move_insn (int_addr_rtx, r);
2631 if (needed_sseregs)
2633 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2634 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2635 if (r != sse_addr_rtx)
2636 emit_move_insn (sse_addr_rtx, r);
2638 if (need_temp)
2640 int i;
2641 rtx mem;
2643 /* Never use the memory itself, as it has the alias set. */
2644 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2645 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2646 set_mem_alias_set (mem, get_varargs_alias_set ());
2647 set_mem_align (mem, BITS_PER_UNIT);
2649 for (i = 0; i < XVECLEN (container, 0); i++)
2651 rtx slot = XVECEXP (container, 0, i);
2652 rtx reg = XEXP (slot, 0);
2653 enum machine_mode mode = GET_MODE (reg);
2654 rtx src_addr;
2655 rtx src_mem;
2656 int src_offset;
2657 rtx dest_mem;
2659 if (SSE_REGNO_P (REGNO (reg)))
2661 src_addr = sse_addr_rtx;
2662 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2664 else
2666 src_addr = int_addr_rtx;
2667 src_offset = REGNO (reg) * 8;
2669 src_mem = gen_rtx_MEM (mode, src_addr);
2670 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2671 src_mem = adjust_address (src_mem, mode, src_offset);
2672 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2673 emit_move_insn (dest_mem, src_mem);
2677 if (needed_intregs)
2680 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2681 build_int_2 (needed_intregs * 8, 0));
2682 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2683 TREE_SIDE_EFFECTS (t) = 1;
2684 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2686 if (needed_sseregs)
2689 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2690 build_int_2 (needed_sseregs * 16, 0));
2691 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2692 TREE_SIDE_EFFECTS (t) = 1;
2693 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2696 emit_jump_insn (gen_jump (lab_over));
2697 emit_barrier ();
2698 emit_label (lab_false);
2701 /* ... otherwise out of the overflow area. */
2703 /* Care for on-stack alignment if needed. */
2704 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2705 t = ovf;
2706 else
2708 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2709 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2710 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2712 t = save_expr (t);
2714 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2715 if (r != addr_rtx)
2716 emit_move_insn (addr_rtx, r);
2719 build (PLUS_EXPR, TREE_TYPE (t), t,
2720 build_int_2 (rsize * UNITS_PER_WORD, 0));
2721 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2722 TREE_SIDE_EFFECTS (t) = 1;
2723 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2725 if (container)
2726 emit_label (lab_over);
2728 return addr_rtx;
2731 /* Return nonzero if OP is general operand representable on x86_64. */
2734 x86_64_general_operand (op, mode)
2735 rtx op;
2736 enum machine_mode mode;
2738 if (!TARGET_64BIT)
2739 return general_operand (op, mode);
2740 if (nonimmediate_operand (op, mode))
2741 return 1;
2742 return x86_64_sign_extended_value (op);
2745 /* Return nonzero if OP is general operand representable on x86_64
2746 as either sign extended or zero extended constant. */
2749 x86_64_szext_general_operand (op, mode)
2750 rtx op;
2751 enum machine_mode mode;
2753 if (!TARGET_64BIT)
2754 return general_operand (op, mode);
2755 if (nonimmediate_operand (op, mode))
2756 return 1;
2757 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2760 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2763 x86_64_nonmemory_operand (op, mode)
2764 rtx op;
2765 enum machine_mode mode;
2767 if (!TARGET_64BIT)
2768 return nonmemory_operand (op, mode);
2769 if (register_operand (op, mode))
2770 return 1;
2771 return x86_64_sign_extended_value (op);
2774 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2777 x86_64_movabs_operand (op, mode)
2778 rtx op;
2779 enum machine_mode mode;
2781 if (!TARGET_64BIT || !flag_pic)
2782 return nonmemory_operand (op, mode);
2783 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2784 return 1;
2785 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2786 return 1;
2787 return 0;
2790 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2793 x86_64_szext_nonmemory_operand (op, mode)
2794 rtx op;
2795 enum machine_mode mode;
2797 if (!TARGET_64BIT)
2798 return nonmemory_operand (op, mode);
2799 if (register_operand (op, mode))
2800 return 1;
2801 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2804 /* Return nonzero if OP is immediate operand representable on x86_64. */
2807 x86_64_immediate_operand (op, mode)
2808 rtx op;
2809 enum machine_mode mode;
2811 if (!TARGET_64BIT)
2812 return immediate_operand (op, mode);
2813 return x86_64_sign_extended_value (op);
2816 /* Return nonzero if OP is immediate operand representable on x86_64. */
2819 x86_64_zext_immediate_operand (op, mode)
2820 rtx op;
2821 enum machine_mode mode ATTRIBUTE_UNUSED;
2823 return x86_64_zero_extended_value (op);
2826 /* Return nonzero if OP is (const_int 1), else return zero. */
2829 const_int_1_operand (op, mode)
2830 rtx op;
2831 enum machine_mode mode ATTRIBUTE_UNUSED;
2833 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2836 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2837 reference and a constant. */
2840 symbolic_operand (op, mode)
2841 register rtx op;
2842 enum machine_mode mode ATTRIBUTE_UNUSED;
2844 switch (GET_CODE (op))
2846 case SYMBOL_REF:
2847 case LABEL_REF:
2848 return 1;
2850 case CONST:
2851 op = XEXP (op, 0);
2852 if (GET_CODE (op) == SYMBOL_REF
2853 || GET_CODE (op) == LABEL_REF
2854 || (GET_CODE (op) == UNSPEC
2855 && (XINT (op, 1) == UNSPEC_GOT
2856 || XINT (op, 1) == UNSPEC_GOTOFF
2857 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2858 return 1;
2859 if (GET_CODE (op) != PLUS
2860 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2861 return 0;
2863 op = XEXP (op, 0);
2864 if (GET_CODE (op) == SYMBOL_REF
2865 || GET_CODE (op) == LABEL_REF)
2866 return 1;
2867 /* Only @GOTOFF gets offsets. */
2868 if (GET_CODE (op) != UNSPEC
2869 || XINT (op, 1) != UNSPEC_GOTOFF)
2870 return 0;
2872 op = XVECEXP (op, 0, 0);
2873 if (GET_CODE (op) == SYMBOL_REF
2874 || GET_CODE (op) == LABEL_REF)
2875 return 1;
2876 return 0;
2878 default:
2879 return 0;
2883 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2886 pic_symbolic_operand (op, mode)
2887 register rtx op;
2888 enum machine_mode mode ATTRIBUTE_UNUSED;
2890 if (GET_CODE (op) != CONST)
2891 return 0;
2892 op = XEXP (op, 0);
2893 if (TARGET_64BIT)
2895 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2896 return 1;
2898 else
2900 if (GET_CODE (op) == UNSPEC)
2901 return 1;
2902 if (GET_CODE (op) != PLUS
2903 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2904 return 0;
2905 op = XEXP (op, 0);
2906 if (GET_CODE (op) == UNSPEC)
2907 return 1;
2909 return 0;
2912 /* Return true if OP is a symbolic operand that resolves locally. */
2914 static int
2915 local_symbolic_operand (op, mode)
2916 rtx op;
2917 enum machine_mode mode ATTRIBUTE_UNUSED;
2919 if (GET_CODE (op) == LABEL_REF)
2920 return 1;
2922 if (GET_CODE (op) == CONST
2923 && GET_CODE (XEXP (op, 0)) == PLUS
2924 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2925 op = XEXP (XEXP (op, 0), 0);
2927 if (GET_CODE (op) != SYMBOL_REF)
2928 return 0;
2930 /* These we've been told are local by varasm and encode_section_info
2931 respectively. */
2932 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2933 return 1;
2935 /* There is, however, a not insubstantial body of code in the rest of
2936 the compiler that assumes it can just stick the results of
2937 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2938 /* ??? This is a hack. Should update the body of the compiler to
2939 always create a DECL an invoke targetm.encode_section_info. */
2940 if (strncmp (XSTR (op, 0), internal_label_prefix,
2941 internal_label_prefix_len) == 0)
2942 return 1;
2944 return 0;
2947 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2950 tls_symbolic_operand (op, mode)
2951 register rtx op;
2952 enum machine_mode mode ATTRIBUTE_UNUSED;
2954 const char *symbol_str;
2956 if (GET_CODE (op) != SYMBOL_REF)
2957 return 0;
2958 symbol_str = XSTR (op, 0);
2960 if (symbol_str[0] != '%')
2961 return 0;
2962 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
2965 static int
2966 tls_symbolic_operand_1 (op, kind)
2967 rtx op;
2968 enum tls_model kind;
2970 const char *symbol_str;
2972 if (GET_CODE (op) != SYMBOL_REF)
2973 return 0;
2974 symbol_str = XSTR (op, 0);
2976 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
2980 global_dynamic_symbolic_operand (op, mode)
2981 register rtx op;
2982 enum machine_mode mode ATTRIBUTE_UNUSED;
2984 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
2988 local_dynamic_symbolic_operand (op, mode)
2989 register rtx op;
2990 enum machine_mode mode ATTRIBUTE_UNUSED;
2992 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
2996 initial_exec_symbolic_operand (op, mode)
2997 register rtx op;
2998 enum machine_mode mode ATTRIBUTE_UNUSED;
3000 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3004 local_exec_symbolic_operand (op, mode)
3005 register rtx op;
3006 enum machine_mode mode ATTRIBUTE_UNUSED;
3008 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3011 /* Test for a valid operand for a call instruction. Don't allow the
3012 arg pointer register or virtual regs since they may decay into
3013 reg + const, which the patterns can't handle. */
3016 call_insn_operand (op, mode)
3017 rtx op;
3018 enum machine_mode mode ATTRIBUTE_UNUSED;
3020 /* Disallow indirect through a virtual register. This leads to
3021 compiler aborts when trying to eliminate them. */
3022 if (GET_CODE (op) == REG
3023 && (op == arg_pointer_rtx
3024 || op == frame_pointer_rtx
3025 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3026 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3027 return 0;
3029 /* Disallow `call 1234'. Due to varying assembler lameness this
3030 gets either rejected or translated to `call .+1234'. */
3031 if (GET_CODE (op) == CONST_INT)
3032 return 0;
3034 /* Explicitly allow SYMBOL_REF even if pic. */
3035 if (GET_CODE (op) == SYMBOL_REF)
3036 return 1;
3038 /* Otherwise we can allow any general_operand in the address. */
3039 return general_operand (op, Pmode);
3043 constant_call_address_operand (op, mode)
3044 rtx op;
3045 enum machine_mode mode ATTRIBUTE_UNUSED;
3047 if (GET_CODE (op) == CONST
3048 && GET_CODE (XEXP (op, 0)) == PLUS
3049 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3050 op = XEXP (XEXP (op, 0), 0);
3051 return GET_CODE (op) == SYMBOL_REF;
3054 /* Match exactly zero and one. */
3057 const0_operand (op, mode)
3058 register rtx op;
3059 enum machine_mode mode;
3061 return op == CONST0_RTX (mode);
3065 const1_operand (op, mode)
3066 register rtx op;
3067 enum machine_mode mode ATTRIBUTE_UNUSED;
3069 return op == const1_rtx;
3072 /* Match 2, 4, or 8. Used for leal multiplicands. */
3075 const248_operand (op, mode)
3076 register rtx op;
3077 enum machine_mode mode ATTRIBUTE_UNUSED;
3079 return (GET_CODE (op) == CONST_INT
3080 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3083 /* True if this is a constant appropriate for an increment or decremenmt. */
3086 incdec_operand (op, mode)
3087 register rtx op;
3088 enum machine_mode mode ATTRIBUTE_UNUSED;
3090 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3091 registers, since carry flag is not set. */
3092 if (TARGET_PENTIUM4 && !optimize_size)
3093 return 0;
3094 return op == const1_rtx || op == constm1_rtx;
3097 /* Return nonzero if OP is acceptable as operand of DImode shift
3098 expander. */
3101 shiftdi_operand (op, mode)
3102 rtx op;
3103 enum machine_mode mode ATTRIBUTE_UNUSED;
3105 if (TARGET_64BIT)
3106 return nonimmediate_operand (op, mode);
3107 else
3108 return register_operand (op, mode);
3111 /* Return false if this is the stack pointer, or any other fake
3112 register eliminable to the stack pointer. Otherwise, this is
3113 a register operand.
3115 This is used to prevent esp from being used as an index reg.
3116 Which would only happen in pathological cases. */
3119 reg_no_sp_operand (op, mode)
3120 register rtx op;
3121 enum machine_mode mode;
3123 rtx t = op;
3124 if (GET_CODE (t) == SUBREG)
3125 t = SUBREG_REG (t);
3126 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3127 return 0;
3129 return register_operand (op, mode);
3133 mmx_reg_operand (op, mode)
3134 register rtx op;
3135 enum machine_mode mode ATTRIBUTE_UNUSED;
3137 return MMX_REG_P (op);
3140 /* Return false if this is any eliminable register. Otherwise
3141 general_operand. */
3144 general_no_elim_operand (op, mode)
3145 register rtx op;
3146 enum machine_mode mode;
3148 rtx t = op;
3149 if (GET_CODE (t) == SUBREG)
3150 t = SUBREG_REG (t);
3151 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3152 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3153 || t == virtual_stack_dynamic_rtx)
3154 return 0;
3155 if (REG_P (t)
3156 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3157 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3158 return 0;
3160 return general_operand (op, mode);
3163 /* Return false if this is any eliminable register. Otherwise
3164 register_operand or const_int. */
3167 nonmemory_no_elim_operand (op, mode)
3168 register rtx op;
3169 enum machine_mode mode;
3171 rtx t = op;
3172 if (GET_CODE (t) == SUBREG)
3173 t = SUBREG_REG (t);
3174 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3175 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3176 || t == virtual_stack_dynamic_rtx)
3177 return 0;
3179 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3182 /* Return true if op is a Q_REGS class register. */
3185 q_regs_operand (op, mode)
3186 register rtx op;
3187 enum machine_mode mode;
3189 if (mode != VOIDmode && GET_MODE (op) != mode)
3190 return 0;
3191 if (GET_CODE (op) == SUBREG)
3192 op = SUBREG_REG (op);
3193 return ANY_QI_REG_P (op);
3196 /* Return true if op is a NON_Q_REGS class register. */
3199 non_q_regs_operand (op, mode)
3200 register rtx op;
3201 enum machine_mode mode;
3203 if (mode != VOIDmode && GET_MODE (op) != mode)
3204 return 0;
3205 if (GET_CODE (op) == SUBREG)
3206 op = SUBREG_REG (op);
3207 return NON_QI_REG_P (op);
3210 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3211 insns. */
3213 sse_comparison_operator (op, mode)
3214 rtx op;
3215 enum machine_mode mode ATTRIBUTE_UNUSED;
3217 enum rtx_code code = GET_CODE (op);
3218 switch (code)
3220 /* Operations supported directly. */
3221 case EQ:
3222 case LT:
3223 case LE:
3224 case UNORDERED:
3225 case NE:
3226 case UNGE:
3227 case UNGT:
3228 case ORDERED:
3229 return 1;
3230 /* These are equivalent to ones above in non-IEEE comparisons. */
3231 case UNEQ:
3232 case UNLT:
3233 case UNLE:
3234 case LTGT:
3235 case GE:
3236 case GT:
3237 return !TARGET_IEEE_FP;
3238 default:
3239 return 0;
3242 /* Return 1 if OP is a valid comparison operator in valid mode. */
3244 ix86_comparison_operator (op, mode)
3245 register rtx op;
3246 enum machine_mode mode;
3248 enum machine_mode inmode;
3249 enum rtx_code code = GET_CODE (op);
3250 if (mode != VOIDmode && GET_MODE (op) != mode)
3251 return 0;
3252 if (GET_RTX_CLASS (code) != '<')
3253 return 0;
3254 inmode = GET_MODE (XEXP (op, 0));
3256 if (inmode == CCFPmode || inmode == CCFPUmode)
3258 enum rtx_code second_code, bypass_code;
3259 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3260 return (bypass_code == NIL && second_code == NIL);
3262 switch (code)
3264 case EQ: case NE:
3265 return 1;
3266 case LT: case GE:
3267 if (inmode == CCmode || inmode == CCGCmode
3268 || inmode == CCGOCmode || inmode == CCNOmode)
3269 return 1;
3270 return 0;
3271 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3272 if (inmode == CCmode)
3273 return 1;
3274 return 0;
3275 case GT: case LE:
3276 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3277 return 1;
3278 return 0;
3279 default:
3280 return 0;
3284 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3287 fcmov_comparison_operator (op, mode)
3288 register rtx op;
3289 enum machine_mode mode;
3291 enum machine_mode inmode;
3292 enum rtx_code code = GET_CODE (op);
3293 if (mode != VOIDmode && GET_MODE (op) != mode)
3294 return 0;
3295 if (GET_RTX_CLASS (code) != '<')
3296 return 0;
3297 inmode = GET_MODE (XEXP (op, 0));
3298 if (inmode == CCFPmode || inmode == CCFPUmode)
3300 enum rtx_code second_code, bypass_code;
3301 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3302 if (bypass_code != NIL || second_code != NIL)
3303 return 0;
3304 code = ix86_fp_compare_code_to_integer (code);
3306 /* i387 supports just limited amount of conditional codes. */
3307 switch (code)
3309 case LTU: case GTU: case LEU: case GEU:
3310 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3311 return 1;
3312 return 0;
3313 case ORDERED: case UNORDERED:
3314 case EQ: case NE:
3315 return 1;
3316 default:
3317 return 0;
3321 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3324 promotable_binary_operator (op, mode)
3325 register rtx op;
3326 enum machine_mode mode ATTRIBUTE_UNUSED;
3328 switch (GET_CODE (op))
3330 case MULT:
3331 /* Modern CPUs have same latency for HImode and SImode multiply,
3332 but 386 and 486 do HImode multiply faster. */
3333 return ix86_cpu > PROCESSOR_I486;
3334 case PLUS:
3335 case AND:
3336 case IOR:
3337 case XOR:
3338 case ASHIFT:
3339 return 1;
3340 default:
3341 return 0;
3345 /* Nearly general operand, but accept any const_double, since we wish
3346 to be able to drop them into memory rather than have them get pulled
3347 into registers. */
3350 cmp_fp_expander_operand (op, mode)
3351 register rtx op;
3352 enum machine_mode mode;
3354 if (mode != VOIDmode && mode != GET_MODE (op))
3355 return 0;
3356 if (GET_CODE (op) == CONST_DOUBLE)
3357 return 1;
3358 return general_operand (op, mode);
3361 /* Match an SI or HImode register for a zero_extract. */
3364 ext_register_operand (op, mode)
3365 register rtx op;
3366 enum machine_mode mode ATTRIBUTE_UNUSED;
3368 int regno;
3369 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3370 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3371 return 0;
3373 if (!register_operand (op, VOIDmode))
3374 return 0;
3376 /* Be curefull to accept only registers having upper parts. */
3377 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3378 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3381 /* Return 1 if this is a valid binary floating-point operation.
3382 OP is the expression matched, and MODE is its mode. */
3385 binary_fp_operator (op, mode)
3386 register rtx op;
3387 enum machine_mode mode;
3389 if (mode != VOIDmode && mode != GET_MODE (op))
3390 return 0;
3392 switch (GET_CODE (op))
3394 case PLUS:
3395 case MINUS:
3396 case MULT:
3397 case DIV:
3398 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3400 default:
3401 return 0;
3406 mult_operator (op, mode)
3407 register rtx op;
3408 enum machine_mode mode ATTRIBUTE_UNUSED;
3410 return GET_CODE (op) == MULT;
3414 div_operator (op, mode)
3415 register rtx op;
3416 enum machine_mode mode ATTRIBUTE_UNUSED;
3418 return GET_CODE (op) == DIV;
3422 arith_or_logical_operator (op, mode)
3423 rtx op;
3424 enum machine_mode mode;
3426 return ((mode == VOIDmode || GET_MODE (op) == mode)
3427 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3428 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3431 /* Returns 1 if OP is memory operand with a displacement. */
3434 memory_displacement_operand (op, mode)
3435 register rtx op;
3436 enum machine_mode mode;
3438 struct ix86_address parts;
3440 if (! memory_operand (op, mode))
3441 return 0;
3443 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3444 abort ();
3446 return parts.disp != NULL_RTX;
3449 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3450 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3452 ??? It seems likely that this will only work because cmpsi is an
3453 expander, and no actual insns use this. */
3456 cmpsi_operand (op, mode)
3457 rtx op;
3458 enum machine_mode mode;
3460 if (nonimmediate_operand (op, mode))
3461 return 1;
3463 if (GET_CODE (op) == AND
3464 && GET_MODE (op) == SImode
3465 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3466 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3467 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3468 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3469 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3470 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3471 return 1;
3473 return 0;
3476 /* Returns 1 if OP is memory operand that can not be represented by the
3477 modRM array. */
3480 long_memory_operand (op, mode)
3481 register rtx op;
3482 enum machine_mode mode;
3484 if (! memory_operand (op, mode))
3485 return 0;
3487 return memory_address_length (op) != 0;
3490 /* Return nonzero if the rtx is known aligned. */
3493 aligned_operand (op, mode)
3494 rtx op;
3495 enum machine_mode mode;
3497 struct ix86_address parts;
3499 if (!general_operand (op, mode))
3500 return 0;
3502 /* Registers and immediate operands are always "aligned". */
3503 if (GET_CODE (op) != MEM)
3504 return 1;
3506 /* Don't even try to do any aligned optimizations with volatiles. */
3507 if (MEM_VOLATILE_P (op))
3508 return 0;
3510 op = XEXP (op, 0);
3512 /* Pushes and pops are only valid on the stack pointer. */
3513 if (GET_CODE (op) == PRE_DEC
3514 || GET_CODE (op) == POST_INC)
3515 return 1;
3517 /* Decode the address. */
3518 if (! ix86_decompose_address (op, &parts))
3519 abort ();
3521 if (parts.base && GET_CODE (parts.base) == SUBREG)
3522 parts.base = SUBREG_REG (parts.base);
3523 if (parts.index && GET_CODE (parts.index) == SUBREG)
3524 parts.index = SUBREG_REG (parts.index);
3526 /* Look for some component that isn't known to be aligned. */
3527 if (parts.index)
3529 if (parts.scale < 4
3530 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3531 return 0;
3533 if (parts.base)
3535 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3536 return 0;
3538 if (parts.disp)
3540 if (GET_CODE (parts.disp) != CONST_INT
3541 || (INTVAL (parts.disp) & 3) != 0)
3542 return 0;
3545 /* Didn't find one -- this must be an aligned address. */
3546 return 1;
3549 /* Return true if the constant is something that can be loaded with
3550 a special instruction. Only handle 0.0 and 1.0; others are less
3551 worthwhile. */
3554 standard_80387_constant_p (x)
3555 rtx x;
3557 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3558 return -1;
3559 /* Note that on the 80387, other constants, such as pi, that we should support
3560 too. On some machines, these are much slower to load as standard constant,
3561 than to load from doubles in memory. */
3562 if (x == CONST0_RTX (GET_MODE (x)))
3563 return 1;
3564 if (x == CONST1_RTX (GET_MODE (x)))
3565 return 2;
3566 return 0;
3569 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3572 standard_sse_constant_p (x)
3573 rtx x;
3575 if (GET_CODE (x) != CONST_DOUBLE)
3576 return -1;
3577 return (x == CONST0_RTX (GET_MODE (x)));
3580 /* Returns 1 if OP contains a symbol reference */
3583 symbolic_reference_mentioned_p (op)
3584 rtx op;
3586 register const char *fmt;
3587 register int i;
3589 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3590 return 1;
3592 fmt = GET_RTX_FORMAT (GET_CODE (op));
3593 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3595 if (fmt[i] == 'E')
3597 register int j;
3599 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3600 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3601 return 1;
3604 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3605 return 1;
3608 return 0;
3611 /* Return 1 if it is appropriate to emit `ret' instructions in the
3612 body of a function. Do this only if the epilogue is simple, needing a
3613 couple of insns. Prior to reloading, we can't tell how many registers
3614 must be saved, so return 0 then. Return 0 if there is no frame
3615 marker to de-allocate.
3617 If NON_SAVING_SETJMP is defined and true, then it is not possible
3618 for the epilogue to be simple, so return 0. This is a special case
3619 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3620 until final, but jump_optimize may need to know sooner if a
3621 `return' is OK. */
3624 ix86_can_use_return_insn_p ()
3626 struct ix86_frame frame;
3628 #ifdef NON_SAVING_SETJMP
3629 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3630 return 0;
3631 #endif
3633 if (! reload_completed || frame_pointer_needed)
3634 return 0;
3636 /* Don't allow more than 32 pop, since that's all we can do
3637 with one instruction. */
3638 if (current_function_pops_args
3639 && current_function_args_size >= 32768)
3640 return 0;
3642 ix86_compute_frame_layout (&frame);
3643 return frame.to_allocate == 0 && frame.nregs == 0;
3646 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3648 x86_64_sign_extended_value (value)
3649 rtx value;
3651 switch (GET_CODE (value))
3653 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3654 to be at least 32 and this all acceptable constants are
3655 represented as CONST_INT. */
3656 case CONST_INT:
3657 if (HOST_BITS_PER_WIDE_INT == 32)
3658 return 1;
3659 else
3661 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3662 return trunc_int_for_mode (val, SImode) == val;
3664 break;
3666 /* For certain code models, the symbolic references are known to fit. */
3667 case SYMBOL_REF:
3668 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3670 /* For certain code models, the code is near as well. */
3671 case LABEL_REF:
3672 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3674 /* We also may accept the offsetted memory references in certain special
3675 cases. */
3676 case CONST:
3677 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3678 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3679 return 1;
3680 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3682 rtx op1 = XEXP (XEXP (value, 0), 0);
3683 rtx op2 = XEXP (XEXP (value, 0), 1);
3684 HOST_WIDE_INT offset;
3686 if (ix86_cmodel == CM_LARGE)
3687 return 0;
3688 if (GET_CODE (op2) != CONST_INT)
3689 return 0;
3690 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3691 switch (GET_CODE (op1))
3693 case SYMBOL_REF:
3694 /* For CM_SMALL assume that latest object is 1MB before
3695 end of 31bits boundary. We may also accept pretty
3696 large negative constants knowing that all objects are
3697 in the positive half of address space. */
3698 if (ix86_cmodel == CM_SMALL
3699 && offset < 1024*1024*1024
3700 && trunc_int_for_mode (offset, SImode) == offset)
3701 return 1;
3702 /* For CM_KERNEL we know that all object resist in the
3703 negative half of 32bits address space. We may not
3704 accept negative offsets, since they may be just off
3705 and we may accept pretty large positive ones. */
3706 if (ix86_cmodel == CM_KERNEL
3707 && offset > 0
3708 && trunc_int_for_mode (offset, SImode) == offset)
3709 return 1;
3710 break;
3711 case LABEL_REF:
3712 /* These conditions are similar to SYMBOL_REF ones, just the
3713 constraints for code models differ. */
3714 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3715 && offset < 1024*1024*1024
3716 && trunc_int_for_mode (offset, SImode) == offset)
3717 return 1;
3718 if (ix86_cmodel == CM_KERNEL
3719 && offset > 0
3720 && trunc_int_for_mode (offset, SImode) == offset)
3721 return 1;
3722 break;
3723 default:
3724 return 0;
3727 return 0;
3728 default:
3729 return 0;
3733 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3735 x86_64_zero_extended_value (value)
3736 rtx value;
3738 switch (GET_CODE (value))
3740 case CONST_DOUBLE:
3741 if (HOST_BITS_PER_WIDE_INT == 32)
3742 return (GET_MODE (value) == VOIDmode
3743 && !CONST_DOUBLE_HIGH (value));
3744 else
3745 return 0;
3746 case CONST_INT:
3747 if (HOST_BITS_PER_WIDE_INT == 32)
3748 return INTVAL (value) >= 0;
3749 else
3750 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3751 break;
3753 /* For certain code models, the symbolic references are known to fit. */
3754 case SYMBOL_REF:
3755 return ix86_cmodel == CM_SMALL;
3757 /* For certain code models, the code is near as well. */
3758 case LABEL_REF:
3759 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3761 /* We also may accept the offsetted memory references in certain special
3762 cases. */
3763 case CONST:
3764 if (GET_CODE (XEXP (value, 0)) == PLUS)
3766 rtx op1 = XEXP (XEXP (value, 0), 0);
3767 rtx op2 = XEXP (XEXP (value, 0), 1);
3769 if (ix86_cmodel == CM_LARGE)
3770 return 0;
3771 switch (GET_CODE (op1))
3773 case SYMBOL_REF:
3774 return 0;
3775 /* For small code model we may accept pretty large positive
3776 offsets, since one bit is available for free. Negative
3777 offsets are limited by the size of NULL pointer area
3778 specified by the ABI. */
3779 if (ix86_cmodel == CM_SMALL
3780 && GET_CODE (op2) == CONST_INT
3781 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3782 && (trunc_int_for_mode (INTVAL (op2), SImode)
3783 == INTVAL (op2)))
3784 return 1;
3785 /* ??? For the kernel, we may accept adjustment of
3786 -0x10000000, since we know that it will just convert
3787 negative address space to positive, but perhaps this
3788 is not worthwhile. */
3789 break;
3790 case LABEL_REF:
3791 /* These conditions are similar to SYMBOL_REF ones, just the
3792 constraints for code models differ. */
3793 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3794 && GET_CODE (op2) == CONST_INT
3795 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3796 && (trunc_int_for_mode (INTVAL (op2), SImode)
3797 == INTVAL (op2)))
3798 return 1;
3799 break;
3800 default:
3801 return 0;
3804 return 0;
3805 default:
3806 return 0;
3810 /* Value should be nonzero if functions must have frame pointers.
3811 Zero means the frame pointer need not be set up (and parms may
3812 be accessed via the stack pointer) in functions that seem suitable. */
3815 ix86_frame_pointer_required ()
3817 /* If we accessed previous frames, then the generated code expects
3818 to be able to access the saved ebp value in our frame. */
3819 if (cfun->machine->accesses_prev_frame)
3820 return 1;
3822 /* Several x86 os'es need a frame pointer for other reasons,
3823 usually pertaining to setjmp. */
3824 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3825 return 1;
3827 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3828 the frame pointer by default. Turn it back on now if we've not
3829 got a leaf function. */
3830 if (TARGET_OMIT_LEAF_FRAME_POINTER
3831 && (!current_function_is_leaf || current_function_profile))
3832 return 1;
3834 return 0;
3837 /* Record that the current function accesses previous call frames. */
3839 void
3840 ix86_setup_frame_addresses ()
3842 cfun->machine->accesses_prev_frame = 1;
3845 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3846 # define USE_HIDDEN_LINKONCE 1
3847 #else
3848 # define USE_HIDDEN_LINKONCE 0
3849 #endif
3851 static int pic_labels_used;
3853 /* Fills in the label name that should be used for a pc thunk for
3854 the given register. */
3856 static void
3857 get_pc_thunk_name (name, regno)
3858 char name[32];
3859 unsigned int regno;
3861 if (USE_HIDDEN_LINKONCE)
3862 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3863 else
3864 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3868 /* This function generates code for -fpic that loads %ebx with
3869 the return address of the caller and then returns. */
3871 void
3872 ix86_asm_file_end (file)
3873 FILE *file;
3875 rtx xops[2];
3876 int regno;
3878 for (regno = 0; regno < 8; ++regno)
3880 char name[32];
3882 if (! ((pic_labels_used >> regno) & 1))
3883 continue;
3885 get_pc_thunk_name (name, regno);
3887 if (USE_HIDDEN_LINKONCE)
3889 tree decl;
3891 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3892 error_mark_node);
3893 TREE_PUBLIC (decl) = 1;
3894 TREE_STATIC (decl) = 1;
3895 DECL_ONE_ONLY (decl) = 1;
3897 (*targetm.asm_out.unique_section) (decl, 0);
3898 named_section (decl, NULL, 0);
3900 ASM_GLOBALIZE_LABEL (file, name);
3901 fputs ("\t.hidden\t", file);
3902 assemble_name (file, name);
3903 fputc ('\n', file);
3904 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3906 else
3908 text_section ();
3909 ASM_OUTPUT_LABEL (file, name);
3912 xops[0] = gen_rtx_REG (SImode, regno);
3913 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3914 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3915 output_asm_insn ("ret", xops);
3919 /* Emit code for the SET_GOT patterns. */
3921 const char *
3922 output_set_got (dest)
3923 rtx dest;
3925 rtx xops[3];
3927 xops[0] = dest;
3928 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3930 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3932 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3934 if (!flag_pic)
3935 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3936 else
3937 output_asm_insn ("call\t%a2", xops);
3939 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3940 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3942 if (flag_pic)
3943 output_asm_insn ("pop{l}\t%0", xops);
3945 else
3947 char name[32];
3948 get_pc_thunk_name (name, REGNO (dest));
3949 pic_labels_used |= 1 << REGNO (dest);
3951 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3952 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3953 output_asm_insn ("call\t%X2", xops);
3956 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3957 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3958 else
3959 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3961 return "";
3964 /* Generate an "push" pattern for input ARG. */
3966 static rtx
3967 gen_push (arg)
3968 rtx arg;
3970 return gen_rtx_SET (VOIDmode,
3971 gen_rtx_MEM (Pmode,
3972 gen_rtx_PRE_DEC (Pmode,
3973 stack_pointer_rtx)),
3974 arg);
3977 /* Return >= 0 if there is an unused call-clobbered register available
3978 for the entire function. */
3980 static unsigned int
3981 ix86_select_alt_pic_regnum ()
3983 if (current_function_is_leaf && !current_function_profile)
3985 int i;
3986 for (i = 2; i >= 0; --i)
3987 if (!regs_ever_live[i])
3988 return i;
3991 return INVALID_REGNUM;
3994 /* Return 1 if we need to save REGNO. */
3995 static int
3996 ix86_save_reg (regno, maybe_eh_return)
3997 unsigned int regno;
3998 int maybe_eh_return;
4000 if (pic_offset_table_rtx
4001 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4002 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4003 || current_function_profile
4004 || current_function_calls_eh_return))
4006 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4007 return 0;
4008 return 1;
4011 if (current_function_calls_eh_return && maybe_eh_return)
4013 unsigned i;
4014 for (i = 0; ; i++)
4016 unsigned test = EH_RETURN_DATA_REGNO (i);
4017 if (test == INVALID_REGNUM)
4018 break;
4019 if (test == regno)
4020 return 1;
4024 return (regs_ever_live[regno]
4025 && !call_used_regs[regno]
4026 && !fixed_regs[regno]
4027 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4030 /* Return number of registers to be saved on the stack. */
4032 static int
4033 ix86_nsaved_regs ()
4035 int nregs = 0;
4036 int regno;
4038 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4039 if (ix86_save_reg (regno, true))
4040 nregs++;
4041 return nregs;
4044 /* Return the offset between two registers, one to be eliminated, and the other
4045 its replacement, at the start of a routine. */
4047 HOST_WIDE_INT
4048 ix86_initial_elimination_offset (from, to)
4049 int from;
4050 int to;
4052 struct ix86_frame frame;
4053 ix86_compute_frame_layout (&frame);
4055 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4056 return frame.hard_frame_pointer_offset;
4057 else if (from == FRAME_POINTER_REGNUM
4058 && to == HARD_FRAME_POINTER_REGNUM)
4059 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4060 else
4062 if (to != STACK_POINTER_REGNUM)
4063 abort ();
4064 else if (from == ARG_POINTER_REGNUM)
4065 return frame.stack_pointer_offset;
4066 else if (from != FRAME_POINTER_REGNUM)
4067 abort ();
4068 else
4069 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4073 /* Fill structure ix86_frame about frame of currently computed function. */
4075 static void
4076 ix86_compute_frame_layout (frame)
4077 struct ix86_frame *frame;
4079 HOST_WIDE_INT total_size;
4080 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4081 int offset;
4082 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4083 HOST_WIDE_INT size = get_frame_size ();
4085 frame->nregs = ix86_nsaved_regs ();
4086 total_size = size;
4088 /* Skip return address and saved base pointer. */
4089 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4091 frame->hard_frame_pointer_offset = offset;
4093 /* Do some sanity checking of stack_alignment_needed and
4094 preferred_alignment, since i386 port is the only using those features
4095 that may break easily. */
4097 if (size && !stack_alignment_needed)
4098 abort ();
4099 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4100 abort ();
4101 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4102 abort ();
4103 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4104 abort ();
4106 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4107 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4109 /* Register save area */
4110 offset += frame->nregs * UNITS_PER_WORD;
4112 /* Va-arg area */
4113 if (ix86_save_varrargs_registers)
4115 offset += X86_64_VARARGS_SIZE;
4116 frame->va_arg_size = X86_64_VARARGS_SIZE;
4118 else
4119 frame->va_arg_size = 0;
4121 /* Align start of frame for local function. */
4122 frame->padding1 = ((offset + stack_alignment_needed - 1)
4123 & -stack_alignment_needed) - offset;
4125 offset += frame->padding1;
4127 /* Frame pointer points here. */
4128 frame->frame_pointer_offset = offset;
4130 offset += size;
4132 /* Add outgoing arguments area. Can be skipped if we eliminated
4133 all the function calls as dead code. */
4134 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4136 offset += current_function_outgoing_args_size;
4137 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4139 else
4140 frame->outgoing_arguments_size = 0;
4142 /* Align stack boundary. Only needed if we're calling another function
4143 or using alloca. */
4144 if (!current_function_is_leaf || current_function_calls_alloca)
4145 frame->padding2 = ((offset + preferred_alignment - 1)
4146 & -preferred_alignment) - offset;
4147 else
4148 frame->padding2 = 0;
4150 offset += frame->padding2;
4152 /* We've reached end of stack frame. */
4153 frame->stack_pointer_offset = offset;
4155 /* Size prologue needs to allocate. */
4156 frame->to_allocate =
4157 (size + frame->padding1 + frame->padding2
4158 + frame->outgoing_arguments_size + frame->va_arg_size);
4160 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4161 && current_function_is_leaf)
4163 frame->red_zone_size = frame->to_allocate;
4164 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4165 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4167 else
4168 frame->red_zone_size = 0;
4169 frame->to_allocate -= frame->red_zone_size;
4170 frame->stack_pointer_offset -= frame->red_zone_size;
4171 #if 0
4172 fprintf (stderr, "nregs: %i\n", frame->nregs);
4173 fprintf (stderr, "size: %i\n", size);
4174 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4175 fprintf (stderr, "padding1: %i\n", frame->padding1);
4176 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4177 fprintf (stderr, "padding2: %i\n", frame->padding2);
4178 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4179 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4180 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4181 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4182 frame->hard_frame_pointer_offset);
4183 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4184 #endif
4187 /* Emit code to save registers in the prologue. */
4189 static void
4190 ix86_emit_save_regs ()
4192 register int regno;
4193 rtx insn;
4195 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4196 if (ix86_save_reg (regno, true))
4198 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4199 RTX_FRAME_RELATED_P (insn) = 1;
4203 /* Emit code to save registers using MOV insns. First register
4204 is restored from POINTER + OFFSET. */
4205 static void
4206 ix86_emit_save_regs_using_mov (pointer, offset)
4207 rtx pointer;
4208 HOST_WIDE_INT offset;
4210 int regno;
4211 rtx insn;
4213 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4214 if (ix86_save_reg (regno, true))
4216 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4217 Pmode, offset),
4218 gen_rtx_REG (Pmode, regno));
4219 RTX_FRAME_RELATED_P (insn) = 1;
4220 offset += UNITS_PER_WORD;
4224 /* Expand the prologue into a bunch of separate insns. */
4226 void
4227 ix86_expand_prologue ()
4229 rtx insn;
4230 bool pic_reg_used;
4231 struct ix86_frame frame;
4232 int use_mov = 0;
4233 HOST_WIDE_INT allocate;
4235 if (!optimize_size)
4237 use_fast_prologue_epilogue
4238 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4239 if (TARGET_PROLOGUE_USING_MOVE)
4240 use_mov = use_fast_prologue_epilogue;
4242 ix86_compute_frame_layout (&frame);
4244 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4245 slower on all targets. Also sdb doesn't like it. */
4247 if (frame_pointer_needed)
4249 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4250 RTX_FRAME_RELATED_P (insn) = 1;
4252 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4253 RTX_FRAME_RELATED_P (insn) = 1;
4256 allocate = frame.to_allocate;
4257 /* In case we are dealing only with single register and empty frame,
4258 push is equivalent of the mov+add sequence. */
4259 if (allocate == 0 && frame.nregs <= 1)
4260 use_mov = 0;
4262 if (!use_mov)
4263 ix86_emit_save_regs ();
4264 else
4265 allocate += frame.nregs * UNITS_PER_WORD;
4267 if (allocate == 0)
4269 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4271 insn = emit_insn (gen_pro_epilogue_adjust_stack
4272 (stack_pointer_rtx, stack_pointer_rtx,
4273 GEN_INT (-allocate)));
4274 RTX_FRAME_RELATED_P (insn) = 1;
4276 else
4278 /* ??? Is this only valid for Win32? */
4280 rtx arg0, sym;
4282 if (TARGET_64BIT)
4283 abort ();
4285 arg0 = gen_rtx_REG (SImode, 0);
4286 emit_move_insn (arg0, GEN_INT (allocate));
4288 sym = gen_rtx_MEM (FUNCTION_MODE,
4289 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4290 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4292 CALL_INSN_FUNCTION_USAGE (insn)
4293 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4294 CALL_INSN_FUNCTION_USAGE (insn));
4296 if (use_mov)
4298 if (!frame_pointer_needed || !frame.to_allocate)
4299 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4300 else
4301 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4302 -frame.nregs * UNITS_PER_WORD);
4305 #ifdef SUBTARGET_PROLOGUE
4306 SUBTARGET_PROLOGUE;
4307 #endif
4309 pic_reg_used = false;
4310 if (pic_offset_table_rtx
4311 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4312 || current_function_profile))
4314 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4316 if (alt_pic_reg_used != INVALID_REGNUM)
4317 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4319 pic_reg_used = true;
4322 if (pic_reg_used)
4324 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4326 /* Even with accurate pre-reload life analysis, we can wind up
4327 deleting all references to the pic register after reload.
4328 Consider if cross-jumping unifies two sides of a branch
4329 controled by a comparison vs the only read from a global.
4330 In which case, allow the set_got to be deleted, though we're
4331 too late to do anything about the ebx save in the prologue. */
4332 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4335 /* Prevent function calls from be scheduled before the call to mcount.
4336 In the pic_reg_used case, make sure that the got load isn't deleted. */
4337 if (current_function_profile)
4338 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4341 /* Emit code to restore saved registers using MOV insns. First register
4342 is restored from POINTER + OFFSET. */
4343 static void
4344 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4345 rtx pointer;
4346 int offset;
4347 int maybe_eh_return;
4349 int regno;
4351 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4352 if (ix86_save_reg (regno, maybe_eh_return))
4354 emit_move_insn (gen_rtx_REG (Pmode, regno),
4355 adjust_address (gen_rtx_MEM (Pmode, pointer),
4356 Pmode, offset));
4357 offset += UNITS_PER_WORD;
4361 /* Restore function stack, frame, and registers. */
4363 void
4364 ix86_expand_epilogue (style)
4365 int style;
4367 int regno;
4368 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4369 struct ix86_frame frame;
4370 HOST_WIDE_INT offset;
4372 ix86_compute_frame_layout (&frame);
4374 /* Calculate start of saved registers relative to ebp. Special care
4375 must be taken for the normal return case of a function using
4376 eh_return: the eax and edx registers are marked as saved, but not
4377 restored along this path. */
4378 offset = frame.nregs;
4379 if (current_function_calls_eh_return && style != 2)
4380 offset -= 2;
4381 offset *= -UNITS_PER_WORD;
4383 /* If we're only restoring one register and sp is not valid then
4384 using a move instruction to restore the register since it's
4385 less work than reloading sp and popping the register.
4387 The default code result in stack adjustment using add/lea instruction,
4388 while this code results in LEAVE instruction (or discrete equivalent),
4389 so it is profitable in some other cases as well. Especially when there
4390 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4391 and there is exactly one register to pop. This heruistic may need some
4392 tuning in future. */
4393 if ((!sp_valid && frame.nregs <= 1)
4394 || (TARGET_EPILOGUE_USING_MOVE
4395 && use_fast_prologue_epilogue
4396 && (frame.nregs > 1 || frame.to_allocate))
4397 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4398 || (frame_pointer_needed && TARGET_USE_LEAVE
4399 && use_fast_prologue_epilogue && frame.nregs == 1)
4400 || current_function_calls_eh_return)
4402 /* Restore registers. We can use ebp or esp to address the memory
4403 locations. If both are available, default to ebp, since offsets
4404 are known to be small. Only exception is esp pointing directly to the
4405 end of block of saved registers, where we may simplify addressing
4406 mode. */
4408 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4409 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4410 frame.to_allocate, style == 2);
4411 else
4412 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4413 offset, style == 2);
4415 /* eh_return epilogues need %ecx added to the stack pointer. */
4416 if (style == 2)
4418 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4420 if (frame_pointer_needed)
4422 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4423 tmp = plus_constant (tmp, UNITS_PER_WORD);
4424 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4426 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4427 emit_move_insn (hard_frame_pointer_rtx, tmp);
4429 emit_insn (gen_pro_epilogue_adjust_stack
4430 (stack_pointer_rtx, sa, const0_rtx));
4432 else
4434 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4435 tmp = plus_constant (tmp, (frame.to_allocate
4436 + frame.nregs * UNITS_PER_WORD));
4437 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4440 else if (!frame_pointer_needed)
4441 emit_insn (gen_pro_epilogue_adjust_stack
4442 (stack_pointer_rtx, stack_pointer_rtx,
4443 GEN_INT (frame.to_allocate
4444 + frame.nregs * UNITS_PER_WORD)));
4445 /* If not an i386, mov & pop is faster than "leave". */
4446 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4447 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4448 else
4450 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4451 hard_frame_pointer_rtx,
4452 const0_rtx));
4453 if (TARGET_64BIT)
4454 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4455 else
4456 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4459 else
4461 /* First step is to deallocate the stack frame so that we can
4462 pop the registers. */
4463 if (!sp_valid)
4465 if (!frame_pointer_needed)
4466 abort ();
4467 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4468 hard_frame_pointer_rtx,
4469 GEN_INT (offset)));
4471 else if (frame.to_allocate)
4472 emit_insn (gen_pro_epilogue_adjust_stack
4473 (stack_pointer_rtx, stack_pointer_rtx,
4474 GEN_INT (frame.to_allocate)));
4476 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4477 if (ix86_save_reg (regno, false))
4479 if (TARGET_64BIT)
4480 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4481 else
4482 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4484 if (frame_pointer_needed)
4486 /* Leave results in shorter dependency chains on CPUs that are
4487 able to grok it fast. */
4488 if (TARGET_USE_LEAVE)
4489 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4490 else if (TARGET_64BIT)
4491 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4492 else
4493 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4497 /* Sibcall epilogues don't want a return instruction. */
4498 if (style == 0)
4499 return;
4501 if (current_function_pops_args && current_function_args_size)
4503 rtx popc = GEN_INT (current_function_pops_args);
4505 /* i386 can only pop 64K bytes. If asked to pop more, pop
4506 return address, do explicit add, and jump indirectly to the
4507 caller. */
4509 if (current_function_pops_args >= 65536)
4511 rtx ecx = gen_rtx_REG (SImode, 2);
4513 /* There are is no "pascal" calling convention in 64bit ABI. */
4514 if (TARGET_64BIT)
4515 abort ();
4517 emit_insn (gen_popsi1 (ecx));
4518 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4519 emit_jump_insn (gen_return_indirect_internal (ecx));
4521 else
4522 emit_jump_insn (gen_return_pop_internal (popc));
4524 else
4525 emit_jump_insn (gen_return_internal ());
4528 /* Reset from the function's potential modifications. */
4530 static void
4531 ix86_output_function_epilogue (file, size)
4532 FILE *file ATTRIBUTE_UNUSED;
4533 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4535 if (pic_offset_table_rtx)
4536 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4539 /* Extract the parts of an RTL expression that is a valid memory address
4540 for an instruction. Return 0 if the structure of the address is
4541 grossly off. Return -1 if the address contains ASHIFT, so it is not
4542 strictly valid, but still used for computing length of lea instruction.
4545 static int
4546 ix86_decompose_address (addr, out)
4547 register rtx addr;
4548 struct ix86_address *out;
4550 rtx base = NULL_RTX;
4551 rtx index = NULL_RTX;
4552 rtx disp = NULL_RTX;
4553 HOST_WIDE_INT scale = 1;
4554 rtx scale_rtx = NULL_RTX;
4555 int retval = 1;
4557 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4558 base = addr;
4559 else if (GET_CODE (addr) == PLUS)
4561 rtx op0 = XEXP (addr, 0);
4562 rtx op1 = XEXP (addr, 1);
4563 enum rtx_code code0 = GET_CODE (op0);
4564 enum rtx_code code1 = GET_CODE (op1);
4566 if (code0 == REG || code0 == SUBREG)
4568 if (code1 == REG || code1 == SUBREG)
4569 index = op0, base = op1; /* index + base */
4570 else
4571 base = op0, disp = op1; /* base + displacement */
4573 else if (code0 == MULT)
4575 index = XEXP (op0, 0);
4576 scale_rtx = XEXP (op0, 1);
4577 if (code1 == REG || code1 == SUBREG)
4578 base = op1; /* index*scale + base */
4579 else
4580 disp = op1; /* index*scale + disp */
4582 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4584 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4585 scale_rtx = XEXP (XEXP (op0, 0), 1);
4586 base = XEXP (op0, 1);
4587 disp = op1;
4589 else if (code0 == PLUS)
4591 index = XEXP (op0, 0); /* index + base + disp */
4592 base = XEXP (op0, 1);
4593 disp = op1;
4595 else
4596 return 0;
4598 else if (GET_CODE (addr) == MULT)
4600 index = XEXP (addr, 0); /* index*scale */
4601 scale_rtx = XEXP (addr, 1);
4603 else if (GET_CODE (addr) == ASHIFT)
4605 rtx tmp;
4607 /* We're called for lea too, which implements ashift on occasion. */
4608 index = XEXP (addr, 0);
4609 tmp = XEXP (addr, 1);
4610 if (GET_CODE (tmp) != CONST_INT)
4611 return 0;
4612 scale = INTVAL (tmp);
4613 if ((unsigned HOST_WIDE_INT) scale > 3)
4614 return 0;
4615 scale = 1 << scale;
4616 retval = -1;
4618 else
4619 disp = addr; /* displacement */
4621 /* Extract the integral value of scale. */
4622 if (scale_rtx)
4624 if (GET_CODE (scale_rtx) != CONST_INT)
4625 return 0;
4626 scale = INTVAL (scale_rtx);
4629 /* Allow arg pointer and stack pointer as index if there is not scaling */
4630 if (base && index && scale == 1
4631 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4632 || index == stack_pointer_rtx))
4634 rtx tmp = base;
4635 base = index;
4636 index = tmp;
4639 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4640 if ((base == hard_frame_pointer_rtx
4641 || base == frame_pointer_rtx
4642 || base == arg_pointer_rtx) && !disp)
4643 disp = const0_rtx;
4645 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4646 Avoid this by transforming to [%esi+0]. */
4647 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4648 && base && !index && !disp
4649 && REG_P (base)
4650 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4651 disp = const0_rtx;
4653 /* Special case: encode reg+reg instead of reg*2. */
4654 if (!base && index && scale && scale == 2)
4655 base = index, scale = 1;
4657 /* Special case: scaling cannot be encoded without base or displacement. */
4658 if (!base && !disp && index && scale != 1)
4659 disp = const0_rtx;
4661 out->base = base;
4662 out->index = index;
4663 out->disp = disp;
4664 out->scale = scale;
4666 return retval;
4669 /* Return cost of the memory address x.
4670 For i386, it is better to use a complex address than let gcc copy
4671 the address into a reg and make a new pseudo. But not if the address
4672 requires to two regs - that would mean more pseudos with longer
4673 lifetimes. */
4675 ix86_address_cost (x)
4676 rtx x;
4678 struct ix86_address parts;
4679 int cost = 1;
4681 if (!ix86_decompose_address (x, &parts))
4682 abort ();
4684 if (parts.base && GET_CODE (parts.base) == SUBREG)
4685 parts.base = SUBREG_REG (parts.base);
4686 if (parts.index && GET_CODE (parts.index) == SUBREG)
4687 parts.index = SUBREG_REG (parts.index);
4689 /* More complex memory references are better. */
4690 if (parts.disp && parts.disp != const0_rtx)
4691 cost--;
4693 /* Attempt to minimize number of registers in the address. */
4694 if ((parts.base
4695 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4696 || (parts.index
4697 && (!REG_P (parts.index)
4698 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4699 cost++;
4701 if (parts.base
4702 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4703 && parts.index
4704 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4705 && parts.base != parts.index)
4706 cost++;
4708 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4709 since it's predecode logic can't detect the length of instructions
4710 and it degenerates to vector decoded. Increase cost of such
4711 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4712 to split such addresses or even refuse such addresses at all.
4714 Following addressing modes are affected:
4715 [base+scale*index]
4716 [scale*index+disp]
4717 [base+index]
4719 The first and last case may be avoidable by explicitly coding the zero in
4720 memory address, but I don't have AMD-K6 machine handy to check this
4721 theory. */
4723 if (TARGET_K6
4724 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4725 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4726 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4727 cost += 10;
4729 return cost;
4732 /* If X is a machine specific address (i.e. a symbol or label being
4733 referenced as a displacement from the GOT implemented using an
4734 UNSPEC), then return the base term. Otherwise return X. */
4737 ix86_find_base_term (x)
4738 rtx x;
4740 rtx term;
4742 if (TARGET_64BIT)
4744 if (GET_CODE (x) != CONST)
4745 return x;
4746 term = XEXP (x, 0);
4747 if (GET_CODE (term) == PLUS
4748 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4749 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4750 term = XEXP (term, 0);
4751 if (GET_CODE (term) != UNSPEC
4752 || XINT (term, 1) != UNSPEC_GOTPCREL)
4753 return x;
4755 term = XVECEXP (term, 0, 0);
4757 if (GET_CODE (term) != SYMBOL_REF
4758 && GET_CODE (term) != LABEL_REF)
4759 return x;
4761 return term;
4764 if (GET_CODE (x) != PLUS
4765 || XEXP (x, 0) != pic_offset_table_rtx
4766 || GET_CODE (XEXP (x, 1)) != CONST)
4767 return x;
4769 term = XEXP (XEXP (x, 1), 0);
4771 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4772 term = XEXP (term, 0);
4774 if (GET_CODE (term) != UNSPEC
4775 || XINT (term, 1) != UNSPEC_GOTOFF)
4776 return x;
4778 term = XVECEXP (term, 0, 0);
4780 if (GET_CODE (term) != SYMBOL_REF
4781 && GET_CODE (term) != LABEL_REF)
4782 return x;
4784 return term;
4787 /* Determine if a given RTX is a valid constant. We already know this
4788 satisfies CONSTANT_P. */
4790 bool
4791 legitimate_constant_p (x)
4792 rtx x;
4794 rtx inner;
4796 switch (GET_CODE (x))
4798 case SYMBOL_REF:
4799 /* TLS symbols are not constant. */
4800 if (tls_symbolic_operand (x, Pmode))
4801 return false;
4802 break;
4804 case CONST:
4805 inner = XEXP (x, 0);
4807 /* Offsets of TLS symbols are never valid.
4808 Discourage CSE from creating them. */
4809 if (GET_CODE (inner) == PLUS
4810 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4811 return false;
4813 /* Only some unspecs are valid as "constants". */
4814 if (GET_CODE (inner) == UNSPEC)
4815 switch (XINT (inner, 1))
4817 case UNSPEC_TPOFF:
4818 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4819 case UNSPEC_TP:
4820 return true;
4821 default:
4822 return false;
4824 break;
4826 default:
4827 break;
4830 /* Otherwise we handle everything else in the move patterns. */
4831 return true;
4834 /* Determine if a given RTX is a valid constant address. */
4836 bool
4837 constant_address_p (x)
4838 rtx x;
4840 switch (GET_CODE (x))
4842 case LABEL_REF:
4843 case CONST_INT:
4844 return true;
4846 case CONST_DOUBLE:
4847 return TARGET_64BIT;
4849 case CONST:
4850 case SYMBOL_REF:
4851 return !flag_pic && legitimate_constant_p (x);
4853 default:
4854 return false;
4858 /* Nonzero if the constant value X is a legitimate general operand
4859 when generating PIC code. It is given that flag_pic is on and
4860 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4862 bool
4863 legitimate_pic_operand_p (x)
4864 rtx x;
4866 rtx inner;
4868 switch (GET_CODE (x))
4870 case CONST:
4871 inner = XEXP (x, 0);
4873 /* Only some unspecs are valid as "constants". */
4874 if (GET_CODE (inner) == UNSPEC)
4875 switch (XINT (inner, 1))
4877 case UNSPEC_TPOFF:
4878 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4879 case UNSPEC_TP:
4880 return true;
4881 default:
4882 return false;
4884 /* FALLTHRU */
4886 case SYMBOL_REF:
4887 case LABEL_REF:
4888 return legitimate_pic_address_disp_p (x);
4890 default:
4891 return true;
4895 /* Determine if a given CONST RTX is a valid memory displacement
4896 in PIC mode. */
4899 legitimate_pic_address_disp_p (disp)
4900 register rtx disp;
4902 bool saw_plus;
4904 /* In 64bit mode we can allow direct addresses of symbols and labels
4905 when they are not dynamic symbols. */
4906 if (TARGET_64BIT)
4908 rtx x = disp;
4909 if (GET_CODE (disp) == CONST)
4910 x = XEXP (disp, 0);
4911 /* ??? Handle PIC code models */
4912 if (GET_CODE (x) == PLUS
4913 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4914 && ix86_cmodel == CM_SMALL_PIC
4915 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4916 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4917 x = XEXP (x, 0);
4918 if (local_symbolic_operand (x, Pmode))
4919 return 1;
4921 if (GET_CODE (disp) != CONST)
4922 return 0;
4923 disp = XEXP (disp, 0);
4925 if (TARGET_64BIT)
4927 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4928 of GOT tables. We should not need these anyway. */
4929 if (GET_CODE (disp) != UNSPEC
4930 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4931 return 0;
4933 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4934 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4935 return 0;
4936 return 1;
4939 saw_plus = false;
4940 if (GET_CODE (disp) == PLUS)
4942 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4943 return 0;
4944 disp = XEXP (disp, 0);
4945 saw_plus = true;
4948 if (GET_CODE (disp) != UNSPEC)
4949 return 0;
4951 switch (XINT (disp, 1))
4953 case UNSPEC_GOT:
4954 if (saw_plus)
4955 return false;
4956 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4957 case UNSPEC_GOTOFF:
4958 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4959 case UNSPEC_GOTTPOFF:
4960 if (saw_plus)
4961 return false;
4962 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4963 case UNSPEC_NTPOFF:
4964 /* ??? Could support offset here. */
4965 if (saw_plus)
4966 return false;
4967 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4968 case UNSPEC_DTPOFF:
4969 /* ??? Could support offset here. */
4970 if (saw_plus)
4971 return false;
4972 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4975 return 0;
4978 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4979 memory address for an instruction. The MODE argument is the machine mode
4980 for the MEM expression that wants to use this address.
4982 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4983 convert common non-canonical forms to canonical form so that they will
4984 be recognized. */
4987 legitimate_address_p (mode, addr, strict)
4988 enum machine_mode mode;
4989 register rtx addr;
4990 int strict;
4992 struct ix86_address parts;
4993 rtx base, index, disp;
4994 HOST_WIDE_INT scale;
4995 const char *reason = NULL;
4996 rtx reason_rtx = NULL_RTX;
4998 if (TARGET_DEBUG_ADDR)
5000 fprintf (stderr,
5001 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5002 GET_MODE_NAME (mode), strict);
5003 debug_rtx (addr);
5006 if (ix86_decompose_address (addr, &parts) <= 0)
5008 reason = "decomposition failed";
5009 goto report_error;
5012 base = parts.base;
5013 index = parts.index;
5014 disp = parts.disp;
5015 scale = parts.scale;
5017 /* Validate base register.
5019 Don't allow SUBREG's here, it can lead to spill failures when the base
5020 is one word out of a two word structure, which is represented internally
5021 as a DImode int. */
5023 if (base)
5025 rtx reg;
5026 reason_rtx = base;
5028 if (GET_CODE (base) == SUBREG)
5029 reg = SUBREG_REG (base);
5030 else
5031 reg = base;
5033 if (GET_CODE (reg) != REG)
5035 reason = "base is not a register";
5036 goto report_error;
5039 if (GET_MODE (base) != Pmode)
5041 reason = "base is not in Pmode";
5042 goto report_error;
5045 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5046 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5048 reason = "base is not valid";
5049 goto report_error;
5053 /* Validate index register.
5055 Don't allow SUBREG's here, it can lead to spill failures when the index
5056 is one word out of a two word structure, which is represented internally
5057 as a DImode int. */
5059 if (index)
5061 rtx reg;
5062 reason_rtx = index;
5064 if (GET_CODE (index) == SUBREG)
5065 reg = SUBREG_REG (index);
5066 else
5067 reg = index;
5069 if (GET_CODE (reg) != REG)
5071 reason = "index is not a register";
5072 goto report_error;
5075 if (GET_MODE (index) != Pmode)
5077 reason = "index is not in Pmode";
5078 goto report_error;
5081 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5082 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5084 reason = "index is not valid";
5085 goto report_error;
5089 /* Validate scale factor. */
5090 if (scale != 1)
5092 reason_rtx = GEN_INT (scale);
5093 if (!index)
5095 reason = "scale without index";
5096 goto report_error;
5099 if (scale != 2 && scale != 4 && scale != 8)
5101 reason = "scale is not a valid multiplier";
5102 goto report_error;
5106 /* Validate displacement. */
5107 if (disp)
5109 reason_rtx = disp;
5111 if (TARGET_64BIT)
5113 if (!x86_64_sign_extended_value (disp))
5115 reason = "displacement is out of range";
5116 goto report_error;
5119 else
5121 if (GET_CODE (disp) == CONST_DOUBLE)
5123 reason = "displacement is a const_double";
5124 goto report_error;
5128 if (GET_CODE (disp) == CONST
5129 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5130 switch (XINT (XEXP (disp, 0), 1))
5132 case UNSPEC_GOT:
5133 case UNSPEC_GOTOFF:
5134 case UNSPEC_GOTPCREL:
5135 if (!flag_pic)
5136 abort ();
5137 goto is_legitimate_pic;
5139 case UNSPEC_GOTTPOFF:
5140 case UNSPEC_NTPOFF:
5141 case UNSPEC_DTPOFF:
5142 break;
5144 default:
5145 reason = "invalid address unspec";
5146 goto report_error;
5149 else if (flag_pic && SYMBOLIC_CONST (disp))
5151 is_legitimate_pic:
5152 if (TARGET_64BIT && (index || base))
5154 reason = "non-constant pic memory reference";
5155 goto report_error;
5157 if (! legitimate_pic_address_disp_p (disp))
5159 reason = "displacement is an invalid pic construct";
5160 goto report_error;
5163 /* This code used to verify that a symbolic pic displacement
5164 includes the pic_offset_table_rtx register.
5166 While this is good idea, unfortunately these constructs may
5167 be created by "adds using lea" optimization for incorrect
5168 code like:
5170 int a;
5171 int foo(int i)
5173 return *(&a+i);
5176 This code is nonsensical, but results in addressing
5177 GOT table with pic_offset_table_rtx base. We can't
5178 just refuse it easily, since it gets matched by
5179 "addsi3" pattern, that later gets split to lea in the
5180 case output register differs from input. While this
5181 can be handled by separate addsi pattern for this case
5182 that never results in lea, this seems to be easier and
5183 correct fix for crash to disable this test. */
5185 else if (!CONSTANT_ADDRESS_P (disp))
5187 reason = "displacement is not constant";
5188 goto report_error;
5192 /* Everything looks valid. */
5193 if (TARGET_DEBUG_ADDR)
5194 fprintf (stderr, "Success.\n");
5195 return TRUE;
5197 report_error:
5198 if (TARGET_DEBUG_ADDR)
5200 fprintf (stderr, "Error: %s\n", reason);
5201 debug_rtx (reason_rtx);
5203 return FALSE;
5206 /* Return an unique alias set for the GOT. */
5208 static HOST_WIDE_INT
5209 ix86_GOT_alias_set ()
5211 static HOST_WIDE_INT set = -1;
5212 if (set == -1)
5213 set = new_alias_set ();
5214 return set;
5217 /* Return a legitimate reference for ORIG (an address) using the
5218 register REG. If REG is 0, a new pseudo is generated.
5220 There are two types of references that must be handled:
5222 1. Global data references must load the address from the GOT, via
5223 the PIC reg. An insn is emitted to do this load, and the reg is
5224 returned.
5226 2. Static data references, constant pool addresses, and code labels
5227 compute the address as an offset from the GOT, whose base is in
5228 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5229 differentiate them from global data objects. The returned
5230 address is the PIC reg + an unspec constant.
5232 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5233 reg also appears in the address. */
5236 legitimize_pic_address (orig, reg)
5237 rtx orig;
5238 rtx reg;
5240 rtx addr = orig;
5241 rtx new = orig;
5242 rtx base;
5244 if (local_symbolic_operand (addr, Pmode))
5246 /* In 64bit mode we can address such objects directly. */
5247 if (TARGET_64BIT)
5248 new = addr;
5249 else
5251 /* This symbol may be referenced via a displacement from the PIC
5252 base address (@GOTOFF). */
5254 if (reload_in_progress)
5255 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5256 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5257 new = gen_rtx_CONST (Pmode, new);
5258 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5260 if (reg != 0)
5262 emit_move_insn (reg, new);
5263 new = reg;
5267 else if (GET_CODE (addr) == SYMBOL_REF)
5269 if (TARGET_64BIT)
5271 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5272 new = gen_rtx_CONST (Pmode, new);
5273 new = gen_rtx_MEM (Pmode, new);
5274 RTX_UNCHANGING_P (new) = 1;
5275 set_mem_alias_set (new, ix86_GOT_alias_set ());
5277 if (reg == 0)
5278 reg = gen_reg_rtx (Pmode);
5279 /* Use directly gen_movsi, otherwise the address is loaded
5280 into register for CSE. We don't want to CSE this addresses,
5281 instead we CSE addresses from the GOT table, so skip this. */
5282 emit_insn (gen_movsi (reg, new));
5283 new = reg;
5285 else
5287 /* This symbol must be referenced via a load from the
5288 Global Offset Table (@GOT). */
5290 if (reload_in_progress)
5291 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5292 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5293 new = gen_rtx_CONST (Pmode, new);
5294 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5295 new = gen_rtx_MEM (Pmode, new);
5296 RTX_UNCHANGING_P (new) = 1;
5297 set_mem_alias_set (new, ix86_GOT_alias_set ());
5299 if (reg == 0)
5300 reg = gen_reg_rtx (Pmode);
5301 emit_move_insn (reg, new);
5302 new = reg;
5305 else
5307 if (GET_CODE (addr) == CONST)
5309 addr = XEXP (addr, 0);
5311 /* We must match stuff we generate before. Assume the only
5312 unspecs that can get here are ours. Not that we could do
5313 anything with them anyway... */
5314 if (GET_CODE (addr) == UNSPEC
5315 || (GET_CODE (addr) == PLUS
5316 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5317 return orig;
5318 if (GET_CODE (addr) != PLUS)
5319 abort ();
5321 if (GET_CODE (addr) == PLUS)
5323 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5325 /* Check first to see if this is a constant offset from a @GOTOFF
5326 symbol reference. */
5327 if (local_symbolic_operand (op0, Pmode)
5328 && GET_CODE (op1) == CONST_INT)
5330 if (!TARGET_64BIT)
5332 if (reload_in_progress)
5333 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5334 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5335 UNSPEC_GOTOFF);
5336 new = gen_rtx_PLUS (Pmode, new, op1);
5337 new = gen_rtx_CONST (Pmode, new);
5338 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5340 if (reg != 0)
5342 emit_move_insn (reg, new);
5343 new = reg;
5346 else
5348 /* ??? We need to limit offsets here. */
5351 else
5353 base = legitimize_pic_address (XEXP (addr, 0), reg);
5354 new = legitimize_pic_address (XEXP (addr, 1),
5355 base == reg ? NULL_RTX : reg);
5357 if (GET_CODE (new) == CONST_INT)
5358 new = plus_constant (base, INTVAL (new));
5359 else
5361 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5363 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5364 new = XEXP (new, 1);
5366 new = gen_rtx_PLUS (Pmode, base, new);
5371 return new;
5374 static void
5375 ix86_encode_section_info (decl, first)
5376 tree decl;
5377 int first ATTRIBUTE_UNUSED;
5379 bool local_p = (*targetm.binds_local_p) (decl);
5380 rtx rtl, symbol;
5382 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5383 if (GET_CODE (rtl) != MEM)
5384 return;
5385 symbol = XEXP (rtl, 0);
5386 if (GET_CODE (symbol) != SYMBOL_REF)
5387 return;
5389 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5390 symbol so that we may access it directly in the GOT. */
5392 if (flag_pic)
5393 SYMBOL_REF_FLAG (symbol) = local_p;
5395 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5396 "local dynamic", "initial exec" or "local exec" TLS models
5397 respectively. */
5399 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5401 const char *symbol_str;
5402 char *newstr;
5403 size_t len;
5404 enum tls_model kind;
5406 if (!flag_pic)
5408 if (local_p)
5409 kind = TLS_MODEL_LOCAL_EXEC;
5410 else
5411 kind = TLS_MODEL_INITIAL_EXEC;
5413 /* Local dynamic is inefficient when we're not combining the
5414 parts of the address. */
5415 else if (optimize && local_p)
5416 kind = TLS_MODEL_LOCAL_DYNAMIC;
5417 else
5418 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5419 if (kind < flag_tls_default)
5420 kind = flag_tls_default;
5422 symbol_str = XSTR (symbol, 0);
5424 if (symbol_str[0] == '%')
5426 if (symbol_str[1] == tls_model_chars[kind])
5427 return;
5428 symbol_str += 2;
5430 len = strlen (symbol_str) + 1;
5431 newstr = alloca (len + 2);
5433 newstr[0] = '%';
5434 newstr[1] = tls_model_chars[kind];
5435 memcpy (newstr + 2, symbol_str, len);
5437 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5441 /* Undo the above when printing symbol names. */
5443 static const char *
5444 ix86_strip_name_encoding (str)
5445 const char *str;
5447 if (str[0] == '%')
5448 str += 2;
5449 if (str [0] == '*')
5450 str += 1;
5451 return str;
5454 /* Load the thread pointer into a register. */
5456 static rtx
5457 get_thread_pointer ()
5459 rtx tp;
5461 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5462 tp = gen_rtx_CONST (Pmode, tp);
5463 tp = force_reg (Pmode, tp);
5465 return tp;
5468 /* Try machine-dependent ways of modifying an illegitimate address
5469 to be legitimate. If we find one, return the new, valid address.
5470 This macro is used in only one place: `memory_address' in explow.c.
5472 OLDX is the address as it was before break_out_memory_refs was called.
5473 In some cases it is useful to look at this to decide what needs to be done.
5475 MODE and WIN are passed so that this macro can use
5476 GO_IF_LEGITIMATE_ADDRESS.
5478 It is always safe for this macro to do nothing. It exists to recognize
5479 opportunities to optimize the output.
5481 For the 80386, we handle X+REG by loading X into a register R and
5482 using R+REG. R will go in a general reg and indexing will be used.
5483 However, if REG is a broken-out memory address or multiplication,
5484 nothing needs to be done because REG can certainly go in a general reg.
5486 When -fpic is used, special handling is needed for symbolic references.
5487 See comments by legitimize_pic_address in i386.c for details. */
5490 legitimize_address (x, oldx, mode)
5491 register rtx x;
5492 register rtx oldx ATTRIBUTE_UNUSED;
5493 enum machine_mode mode;
5495 int changed = 0;
5496 unsigned log;
5498 if (TARGET_DEBUG_ADDR)
5500 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5501 GET_MODE_NAME (mode));
5502 debug_rtx (x);
5505 log = tls_symbolic_operand (x, mode);
5506 if (log)
5508 rtx dest, base, off, pic;
5510 switch (log)
5512 case TLS_MODEL_GLOBAL_DYNAMIC:
5513 dest = gen_reg_rtx (Pmode);
5514 emit_insn (gen_tls_global_dynamic (dest, x));
5515 break;
5517 case TLS_MODEL_LOCAL_DYNAMIC:
5518 base = gen_reg_rtx (Pmode);
5519 emit_insn (gen_tls_local_dynamic_base (base));
5521 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5522 off = gen_rtx_CONST (Pmode, off);
5524 return gen_rtx_PLUS (Pmode, base, off);
5526 case TLS_MODEL_INITIAL_EXEC:
5527 if (flag_pic)
5529 if (reload_in_progress)
5530 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5531 pic = pic_offset_table_rtx;
5533 else
5535 pic = gen_reg_rtx (Pmode);
5536 emit_insn (gen_set_got (pic));
5539 base = get_thread_pointer ();
5541 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5542 off = gen_rtx_CONST (Pmode, off);
5543 off = gen_rtx_PLUS (Pmode, pic, off);
5544 off = gen_rtx_MEM (Pmode, off);
5545 RTX_UNCHANGING_P (off) = 1;
5546 set_mem_alias_set (off, ix86_GOT_alias_set ());
5548 /* Damn Sun for specifing a set of dynamic relocations without
5549 considering the two-operand nature of the architecture!
5550 We'd be much better off with a "GOTNTPOFF" relocation that
5551 already contained the negated constant. */
5552 /* ??? Using negl and reg+reg addressing appears to be a lose
5553 size-wise. The negl is two bytes, just like the extra movl
5554 incurred by the two-operand subl, but reg+reg addressing
5555 uses the two-byte modrm form, unlike plain reg. */
5557 dest = gen_reg_rtx (Pmode);
5558 emit_insn (gen_subsi3 (dest, base, off));
5559 break;
5561 case TLS_MODEL_LOCAL_EXEC:
5562 base = get_thread_pointer ();
5564 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5565 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5566 off = gen_rtx_CONST (Pmode, off);
5568 if (TARGET_GNU_TLS)
5569 return gen_rtx_PLUS (Pmode, base, off);
5570 else
5572 dest = gen_reg_rtx (Pmode);
5573 emit_insn (gen_subsi3 (dest, base, off));
5575 break;
5577 default:
5578 abort ();
5581 return dest;
5584 if (flag_pic && SYMBOLIC_CONST (x))
5585 return legitimize_pic_address (x, 0);
5587 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5588 if (GET_CODE (x) == ASHIFT
5589 && GET_CODE (XEXP (x, 1)) == CONST_INT
5590 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5592 changed = 1;
5593 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5594 GEN_INT (1 << log));
5597 if (GET_CODE (x) == PLUS)
5599 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5601 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5602 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5603 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5605 changed = 1;
5606 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5607 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5608 GEN_INT (1 << log));
5611 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5612 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5613 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5615 changed = 1;
5616 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5617 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5618 GEN_INT (1 << log));
5621 /* Put multiply first if it isn't already. */
5622 if (GET_CODE (XEXP (x, 1)) == MULT)
5624 rtx tmp = XEXP (x, 0);
5625 XEXP (x, 0) = XEXP (x, 1);
5626 XEXP (x, 1) = tmp;
5627 changed = 1;
5630 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5631 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5632 created by virtual register instantiation, register elimination, and
5633 similar optimizations. */
5634 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5636 changed = 1;
5637 x = gen_rtx_PLUS (Pmode,
5638 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5639 XEXP (XEXP (x, 1), 0)),
5640 XEXP (XEXP (x, 1), 1));
5643 /* Canonicalize
5644 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5645 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5646 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5647 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5648 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5649 && CONSTANT_P (XEXP (x, 1)))
5651 rtx constant;
5652 rtx other = NULL_RTX;
5654 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5656 constant = XEXP (x, 1);
5657 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5659 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5661 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5662 other = XEXP (x, 1);
5664 else
5665 constant = 0;
5667 if (constant)
5669 changed = 1;
5670 x = gen_rtx_PLUS (Pmode,
5671 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5672 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5673 plus_constant (other, INTVAL (constant)));
5677 if (changed && legitimate_address_p (mode, x, FALSE))
5678 return x;
5680 if (GET_CODE (XEXP (x, 0)) == MULT)
5682 changed = 1;
5683 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5686 if (GET_CODE (XEXP (x, 1)) == MULT)
5688 changed = 1;
5689 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5692 if (changed
5693 && GET_CODE (XEXP (x, 1)) == REG
5694 && GET_CODE (XEXP (x, 0)) == REG)
5695 return x;
5697 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5699 changed = 1;
5700 x = legitimize_pic_address (x, 0);
5703 if (changed && legitimate_address_p (mode, x, FALSE))
5704 return x;
5706 if (GET_CODE (XEXP (x, 0)) == REG)
5708 register rtx temp = gen_reg_rtx (Pmode);
5709 register rtx val = force_operand (XEXP (x, 1), temp);
5710 if (val != temp)
5711 emit_move_insn (temp, val);
5713 XEXP (x, 1) = temp;
5714 return x;
5717 else if (GET_CODE (XEXP (x, 1)) == REG)
5719 register rtx temp = gen_reg_rtx (Pmode);
5720 register rtx val = force_operand (XEXP (x, 0), temp);
5721 if (val != temp)
5722 emit_move_insn (temp, val);
5724 XEXP (x, 0) = temp;
5725 return x;
5729 return x;
5732 /* Print an integer constant expression in assembler syntax. Addition
5733 and subtraction are the only arithmetic that may appear in these
5734 expressions. FILE is the stdio stream to write to, X is the rtx, and
5735 CODE is the operand print code from the output string. */
5737 static void
5738 output_pic_addr_const (file, x, code)
5739 FILE *file;
5740 rtx x;
5741 int code;
5743 char buf[256];
5745 switch (GET_CODE (x))
5747 case PC:
5748 if (flag_pic)
5749 putc ('.', file);
5750 else
5751 abort ();
5752 break;
5754 case SYMBOL_REF:
5755 assemble_name (file, XSTR (x, 0));
5756 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5757 fputs ("@PLT", file);
5758 break;
5760 case LABEL_REF:
5761 x = XEXP (x, 0);
5762 /* FALLTHRU */
5763 case CODE_LABEL:
5764 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5765 assemble_name (asm_out_file, buf);
5766 break;
5768 case CONST_INT:
5769 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5770 break;
5772 case CONST:
5773 /* This used to output parentheses around the expression,
5774 but that does not work on the 386 (either ATT or BSD assembler). */
5775 output_pic_addr_const (file, XEXP (x, 0), code);
5776 break;
5778 case CONST_DOUBLE:
5779 if (GET_MODE (x) == VOIDmode)
5781 /* We can use %d if the number is <32 bits and positive. */
5782 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5783 fprintf (file, "0x%lx%08lx",
5784 (unsigned long) CONST_DOUBLE_HIGH (x),
5785 (unsigned long) CONST_DOUBLE_LOW (x));
5786 else
5787 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5789 else
5790 /* We can't handle floating point constants;
5791 PRINT_OPERAND must handle them. */
5792 output_operand_lossage ("floating constant misused");
5793 break;
5795 case PLUS:
5796 /* Some assemblers need integer constants to appear first. */
5797 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5799 output_pic_addr_const (file, XEXP (x, 0), code);
5800 putc ('+', file);
5801 output_pic_addr_const (file, XEXP (x, 1), code);
5803 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5805 output_pic_addr_const (file, XEXP (x, 1), code);
5806 putc ('+', file);
5807 output_pic_addr_const (file, XEXP (x, 0), code);
5809 else
5810 abort ();
5811 break;
5813 case MINUS:
5814 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5815 output_pic_addr_const (file, XEXP (x, 0), code);
5816 putc ('-', file);
5817 output_pic_addr_const (file, XEXP (x, 1), code);
5818 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5819 break;
5821 case UNSPEC:
5822 if (XVECLEN (x, 0) != 1)
5823 abort ();
5824 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5825 switch (XINT (x, 1))
5827 case UNSPEC_GOT:
5828 fputs ("@GOT", file);
5829 break;
5830 case UNSPEC_GOTOFF:
5831 fputs ("@GOTOFF", file);
5832 break;
5833 case UNSPEC_GOTPCREL:
5834 fputs ("@GOTPCREL(%rip)", file);
5835 break;
5836 case UNSPEC_GOTTPOFF:
5837 fputs ("@GOTTPOFF", file);
5838 break;
5839 case UNSPEC_TPOFF:
5840 fputs ("@TPOFF", file);
5841 break;
5842 case UNSPEC_NTPOFF:
5843 fputs ("@NTPOFF", file);
5844 break;
5845 case UNSPEC_DTPOFF:
5846 fputs ("@DTPOFF", file);
5847 break;
5848 default:
5849 output_operand_lossage ("invalid UNSPEC as operand");
5850 break;
5852 break;
5854 default:
5855 output_operand_lossage ("invalid expression as operand");
5859 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5860 We need to handle our special PIC relocations. */
5862 void
5863 i386_dwarf_output_addr_const (file, x)
5864 FILE *file;
5865 rtx x;
5867 #ifdef ASM_QUAD
5868 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5869 #else
5870 if (TARGET_64BIT)
5871 abort ();
5872 fprintf (file, "%s", ASM_LONG);
5873 #endif
5874 if (flag_pic)
5875 output_pic_addr_const (file, x, '\0');
5876 else
5877 output_addr_const (file, x);
5878 fputc ('\n', file);
5881 /* In the name of slightly smaller debug output, and to cater to
5882 general assembler losage, recognize PIC+GOTOFF and turn it back
5883 into a direct symbol reference. */
5886 i386_simplify_dwarf_addr (orig_x)
5887 rtx orig_x;
5889 rtx x = orig_x, y;
5891 if (GET_CODE (x) == MEM)
5892 x = XEXP (x, 0);
5894 if (TARGET_64BIT)
5896 if (GET_CODE (x) != CONST
5897 || GET_CODE (XEXP (x, 0)) != UNSPEC
5898 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5899 || GET_CODE (orig_x) != MEM)
5900 return orig_x;
5901 return XVECEXP (XEXP (x, 0), 0, 0);
5904 if (GET_CODE (x) != PLUS
5905 || GET_CODE (XEXP (x, 1)) != CONST)
5906 return orig_x;
5908 if (GET_CODE (XEXP (x, 0)) == REG
5909 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5910 /* %ebx + GOT/GOTOFF */
5911 y = NULL;
5912 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5914 /* %ebx + %reg * scale + GOT/GOTOFF */
5915 y = XEXP (x, 0);
5916 if (GET_CODE (XEXP (y, 0)) == REG
5917 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5918 y = XEXP (y, 1);
5919 else if (GET_CODE (XEXP (y, 1)) == REG
5920 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5921 y = XEXP (y, 0);
5922 else
5923 return orig_x;
5924 if (GET_CODE (y) != REG
5925 && GET_CODE (y) != MULT
5926 && GET_CODE (y) != ASHIFT)
5927 return orig_x;
5929 else
5930 return orig_x;
5932 x = XEXP (XEXP (x, 1), 0);
5933 if (GET_CODE (x) == UNSPEC
5934 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5935 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
5937 if (y)
5938 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5939 return XVECEXP (x, 0, 0);
5942 if (GET_CODE (x) == PLUS
5943 && GET_CODE (XEXP (x, 0)) == UNSPEC
5944 && GET_CODE (XEXP (x, 1)) == CONST_INT
5945 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5946 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5947 && GET_CODE (orig_x) != MEM)))
5949 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5950 if (y)
5951 return gen_rtx_PLUS (Pmode, y, x);
5952 return x;
5955 return orig_x;
5958 static void
5959 put_condition_code (code, mode, reverse, fp, file)
5960 enum rtx_code code;
5961 enum machine_mode mode;
5962 int reverse, fp;
5963 FILE *file;
5965 const char *suffix;
5967 if (mode == CCFPmode || mode == CCFPUmode)
5969 enum rtx_code second_code, bypass_code;
5970 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5971 if (bypass_code != NIL || second_code != NIL)
5972 abort ();
5973 code = ix86_fp_compare_code_to_integer (code);
5974 mode = CCmode;
5976 if (reverse)
5977 code = reverse_condition (code);
5979 switch (code)
5981 case EQ:
5982 suffix = "e";
5983 break;
5984 case NE:
5985 suffix = "ne";
5986 break;
5987 case GT:
5988 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5989 abort ();
5990 suffix = "g";
5991 break;
5992 case GTU:
5993 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5994 Those same assemblers have the same but opposite losage on cmov. */
5995 if (mode != CCmode)
5996 abort ();
5997 suffix = fp ? "nbe" : "a";
5998 break;
5999 case LT:
6000 if (mode == CCNOmode || mode == CCGOCmode)
6001 suffix = "s";
6002 else if (mode == CCmode || mode == CCGCmode)
6003 suffix = "l";
6004 else
6005 abort ();
6006 break;
6007 case LTU:
6008 if (mode != CCmode)
6009 abort ();
6010 suffix = "b";
6011 break;
6012 case GE:
6013 if (mode == CCNOmode || mode == CCGOCmode)
6014 suffix = "ns";
6015 else if (mode == CCmode || mode == CCGCmode)
6016 suffix = "ge";
6017 else
6018 abort ();
6019 break;
6020 case GEU:
6021 /* ??? As above. */
6022 if (mode != CCmode)
6023 abort ();
6024 suffix = fp ? "nb" : "ae";
6025 break;
6026 case LE:
6027 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6028 abort ();
6029 suffix = "le";
6030 break;
6031 case LEU:
6032 if (mode != CCmode)
6033 abort ();
6034 suffix = "be";
6035 break;
6036 case UNORDERED:
6037 suffix = fp ? "u" : "p";
6038 break;
6039 case ORDERED:
6040 suffix = fp ? "nu" : "np";
6041 break;
6042 default:
6043 abort ();
6045 fputs (suffix, file);
6048 void
6049 print_reg (x, code, file)
6050 rtx x;
6051 int code;
6052 FILE *file;
6054 if (REGNO (x) == ARG_POINTER_REGNUM
6055 || REGNO (x) == FRAME_POINTER_REGNUM
6056 || REGNO (x) == FLAGS_REG
6057 || REGNO (x) == FPSR_REG)
6058 abort ();
6060 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6061 putc ('%', file);
6063 if (code == 'w' || MMX_REG_P (x))
6064 code = 2;
6065 else if (code == 'b')
6066 code = 1;
6067 else if (code == 'k')
6068 code = 4;
6069 else if (code == 'q')
6070 code = 8;
6071 else if (code == 'y')
6072 code = 3;
6073 else if (code == 'h')
6074 code = 0;
6075 else
6076 code = GET_MODE_SIZE (GET_MODE (x));
6078 /* Irritatingly, AMD extended registers use different naming convention
6079 from the normal registers. */
6080 if (REX_INT_REG_P (x))
6082 if (!TARGET_64BIT)
6083 abort ();
6084 switch (code)
6086 case 0:
6087 error ("extended registers have no high halves");
6088 break;
6089 case 1:
6090 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6091 break;
6092 case 2:
6093 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6094 break;
6095 case 4:
6096 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6097 break;
6098 case 8:
6099 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6100 break;
6101 default:
6102 error ("unsupported operand size for extended register");
6103 break;
6105 return;
6107 switch (code)
6109 case 3:
6110 if (STACK_TOP_P (x))
6112 fputs ("st(0)", file);
6113 break;
6115 /* FALLTHRU */
6116 case 8:
6117 case 4:
6118 case 12:
6119 if (! ANY_FP_REG_P (x))
6120 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6121 /* FALLTHRU */
6122 case 16:
6123 case 2:
6124 fputs (hi_reg_name[REGNO (x)], file);
6125 break;
6126 case 1:
6127 fputs (qi_reg_name[REGNO (x)], file);
6128 break;
6129 case 0:
6130 fputs (qi_high_reg_name[REGNO (x)], file);
6131 break;
6132 default:
6133 abort ();
6137 /* Locate some local-dynamic symbol still in use by this function
6138 so that we can print its name in some tls_local_dynamic_base
6139 pattern. */
6141 static const char *
6142 get_some_local_dynamic_name ()
6144 rtx insn;
6146 if (cfun->machine->some_ld_name)
6147 return cfun->machine->some_ld_name;
6149 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6150 if (INSN_P (insn)
6151 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6152 return cfun->machine->some_ld_name;
6154 abort ();
6157 static int
6158 get_some_local_dynamic_name_1 (px, data)
6159 rtx *px;
6160 void *data ATTRIBUTE_UNUSED;
6162 rtx x = *px;
6164 if (GET_CODE (x) == SYMBOL_REF
6165 && local_dynamic_symbolic_operand (x, Pmode))
6167 cfun->machine->some_ld_name = XSTR (x, 0);
6168 return 1;
6171 return 0;
6174 /* Meaning of CODE:
6175 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6176 C -- print opcode suffix for set/cmov insn.
6177 c -- like C, but print reversed condition
6178 F,f -- likewise, but for floating-point.
6179 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6180 nothing
6181 R -- print the prefix for register names.
6182 z -- print the opcode suffix for the size of the current operand.
6183 * -- print a star (in certain assembler syntax)
6184 A -- print an absolute memory reference.
6185 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6186 s -- print a shift double count, followed by the assemblers argument
6187 delimiter.
6188 b -- print the QImode name of the register for the indicated operand.
6189 %b0 would print %al if operands[0] is reg 0.
6190 w -- likewise, print the HImode name of the register.
6191 k -- likewise, print the SImode name of the register.
6192 q -- likewise, print the DImode name of the register.
6193 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6194 y -- print "st(0)" instead of "st" as a register.
6195 D -- print condition for SSE cmp instruction.
6196 P -- if PIC, print an @PLT suffix.
6197 X -- don't print any sort of PIC '@' suffix for a symbol.
6198 & -- print some in-use local-dynamic symbol name.
6201 void
6202 print_operand (file, x, code)
6203 FILE *file;
6204 rtx x;
6205 int code;
6207 if (code)
6209 switch (code)
6211 case '*':
6212 if (ASSEMBLER_DIALECT == ASM_ATT)
6213 putc ('*', file);
6214 return;
6216 case '&':
6217 assemble_name (file, get_some_local_dynamic_name ());
6218 return;
6220 case 'A':
6221 if (ASSEMBLER_DIALECT == ASM_ATT)
6222 putc ('*', file);
6223 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6225 /* Intel syntax. For absolute addresses, registers should not
6226 be surrounded by braces. */
6227 if (GET_CODE (x) != REG)
6229 putc ('[', file);
6230 PRINT_OPERAND (file, x, 0);
6231 putc (']', file);
6232 return;
6235 else
6236 abort ();
6238 PRINT_OPERAND (file, x, 0);
6239 return;
6242 case 'L':
6243 if (ASSEMBLER_DIALECT == ASM_ATT)
6244 putc ('l', file);
6245 return;
6247 case 'W':
6248 if (ASSEMBLER_DIALECT == ASM_ATT)
6249 putc ('w', file);
6250 return;
6252 case 'B':
6253 if (ASSEMBLER_DIALECT == ASM_ATT)
6254 putc ('b', file);
6255 return;
6257 case 'Q':
6258 if (ASSEMBLER_DIALECT == ASM_ATT)
6259 putc ('l', file);
6260 return;
6262 case 'S':
6263 if (ASSEMBLER_DIALECT == ASM_ATT)
6264 putc ('s', file);
6265 return;
6267 case 'T':
6268 if (ASSEMBLER_DIALECT == ASM_ATT)
6269 putc ('t', file);
6270 return;
6272 case 'z':
6273 /* 387 opcodes don't get size suffixes if the operands are
6274 registers. */
6275 if (STACK_REG_P (x))
6276 return;
6278 /* Likewise if using Intel opcodes. */
6279 if (ASSEMBLER_DIALECT == ASM_INTEL)
6280 return;
6282 /* This is the size of op from size of operand. */
6283 switch (GET_MODE_SIZE (GET_MODE (x)))
6285 case 2:
6286 #ifdef HAVE_GAS_FILDS_FISTS
6287 putc ('s', file);
6288 #endif
6289 return;
6291 case 4:
6292 if (GET_MODE (x) == SFmode)
6294 putc ('s', file);
6295 return;
6297 else
6298 putc ('l', file);
6299 return;
6301 case 12:
6302 case 16:
6303 putc ('t', file);
6304 return;
6306 case 8:
6307 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6309 #ifdef GAS_MNEMONICS
6310 putc ('q', file);
6311 #else
6312 putc ('l', file);
6313 putc ('l', file);
6314 #endif
6316 else
6317 putc ('l', file);
6318 return;
6320 default:
6321 abort ();
6324 case 'b':
6325 case 'w':
6326 case 'k':
6327 case 'q':
6328 case 'h':
6329 case 'y':
6330 case 'X':
6331 case 'P':
6332 break;
6334 case 's':
6335 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6337 PRINT_OPERAND (file, x, 0);
6338 putc (',', file);
6340 return;
6342 case 'D':
6343 /* Little bit of braindamage here. The SSE compare instructions
6344 does use completely different names for the comparisons that the
6345 fp conditional moves. */
6346 switch (GET_CODE (x))
6348 case EQ:
6349 case UNEQ:
6350 fputs ("eq", file);
6351 break;
6352 case LT:
6353 case UNLT:
6354 fputs ("lt", file);
6355 break;
6356 case LE:
6357 case UNLE:
6358 fputs ("le", file);
6359 break;
6360 case UNORDERED:
6361 fputs ("unord", file);
6362 break;
6363 case NE:
6364 case LTGT:
6365 fputs ("neq", file);
6366 break;
6367 case UNGE:
6368 case GE:
6369 fputs ("nlt", file);
6370 break;
6371 case UNGT:
6372 case GT:
6373 fputs ("nle", file);
6374 break;
6375 case ORDERED:
6376 fputs ("ord", file);
6377 break;
6378 default:
6379 abort ();
6380 break;
6382 return;
6383 case 'O':
6384 #ifdef CMOV_SUN_AS_SYNTAX
6385 if (ASSEMBLER_DIALECT == ASM_ATT)
6387 switch (GET_MODE (x))
6389 case HImode: putc ('w', file); break;
6390 case SImode:
6391 case SFmode: putc ('l', file); break;
6392 case DImode:
6393 case DFmode: putc ('q', file); break;
6394 default: abort ();
6396 putc ('.', file);
6398 #endif
6399 return;
6400 case 'C':
6401 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6402 return;
6403 case 'F':
6404 #ifdef CMOV_SUN_AS_SYNTAX
6405 if (ASSEMBLER_DIALECT == ASM_ATT)
6406 putc ('.', file);
6407 #endif
6408 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6409 return;
6411 /* Like above, but reverse condition */
6412 case 'c':
6413 /* Check to see if argument to %c is really a constant
6414 and not a condition code which needs to be reversed. */
6415 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6417 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6418 return;
6420 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6421 return;
6422 case 'f':
6423 #ifdef CMOV_SUN_AS_SYNTAX
6424 if (ASSEMBLER_DIALECT == ASM_ATT)
6425 putc ('.', file);
6426 #endif
6427 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6428 return;
6429 case '+':
6431 rtx x;
6433 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6434 return;
6436 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6437 if (x)
6439 int pred_val = INTVAL (XEXP (x, 0));
6441 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6442 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6444 int taken = pred_val > REG_BR_PROB_BASE / 2;
6445 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6447 /* Emit hints only in the case default branch prediction
6448 heruistics would fail. */
6449 if (taken != cputaken)
6451 /* We use 3e (DS) prefix for taken branches and
6452 2e (CS) prefix for not taken branches. */
6453 if (taken)
6454 fputs ("ds ; ", file);
6455 else
6456 fputs ("cs ; ", file);
6460 return;
6462 default:
6463 output_operand_lossage ("invalid operand code `%c'", code);
6467 if (GET_CODE (x) == REG)
6469 PRINT_REG (x, code, file);
6472 else if (GET_CODE (x) == MEM)
6474 /* No `byte ptr' prefix for call instructions. */
6475 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6477 const char * size;
6478 switch (GET_MODE_SIZE (GET_MODE (x)))
6480 case 1: size = "BYTE"; break;
6481 case 2: size = "WORD"; break;
6482 case 4: size = "DWORD"; break;
6483 case 8: size = "QWORD"; break;
6484 case 12: size = "XWORD"; break;
6485 case 16: size = "XMMWORD"; break;
6486 default:
6487 abort ();
6490 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6491 if (code == 'b')
6492 size = "BYTE";
6493 else if (code == 'w')
6494 size = "WORD";
6495 else if (code == 'k')
6496 size = "DWORD";
6498 fputs (size, file);
6499 fputs (" PTR ", file);
6502 x = XEXP (x, 0);
6503 if (flag_pic && CONSTANT_ADDRESS_P (x))
6504 output_pic_addr_const (file, x, code);
6505 /* Avoid (%rip) for call operands. */
6506 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6507 && GET_CODE (x) != CONST_INT)
6508 output_addr_const (file, x);
6509 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6510 output_operand_lossage ("invalid constraints for operand");
6511 else
6512 output_address (x);
6515 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6517 REAL_VALUE_TYPE r;
6518 long l;
6520 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6521 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6523 if (ASSEMBLER_DIALECT == ASM_ATT)
6524 putc ('$', file);
6525 fprintf (file, "0x%lx", l);
6528 /* These float cases don't actually occur as immediate operands. */
6529 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6531 REAL_VALUE_TYPE r;
6532 char dstr[30];
6534 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6535 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6536 fprintf (file, "%s", dstr);
6539 else if (GET_CODE (x) == CONST_DOUBLE
6540 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6542 REAL_VALUE_TYPE r;
6543 char dstr[30];
6545 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6546 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6547 fprintf (file, "%s", dstr);
6550 else if (GET_CODE (x) == CONST
6551 && GET_CODE (XEXP (x, 0)) == UNSPEC
6552 && XINT (XEXP (x, 0), 1) == UNSPEC_TP)
6554 if (ASSEMBLER_DIALECT == ASM_INTEL)
6555 fputs ("DWORD PTR ", file);
6556 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6557 putc ('%', file);
6558 fputs ("gs:0", file);
6561 else
6563 if (code != 'P')
6565 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6567 if (ASSEMBLER_DIALECT == ASM_ATT)
6568 putc ('$', file);
6570 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6571 || GET_CODE (x) == LABEL_REF)
6573 if (ASSEMBLER_DIALECT == ASM_ATT)
6574 putc ('$', file);
6575 else
6576 fputs ("OFFSET FLAT:", file);
6579 if (GET_CODE (x) == CONST_INT)
6580 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6581 else if (flag_pic)
6582 output_pic_addr_const (file, x, code);
6583 else
6584 output_addr_const (file, x);
6588 /* Print a memory operand whose address is ADDR. */
6590 void
6591 print_operand_address (file, addr)
6592 FILE *file;
6593 register rtx addr;
6595 struct ix86_address parts;
6596 rtx base, index, disp;
6597 int scale;
6599 if (! ix86_decompose_address (addr, &parts))
6600 abort ();
6602 base = parts.base;
6603 index = parts.index;
6604 disp = parts.disp;
6605 scale = parts.scale;
6607 if (!base && !index)
6609 /* Displacement only requires special attention. */
6611 if (GET_CODE (disp) == CONST_INT)
6613 if (ASSEMBLER_DIALECT == ASM_INTEL)
6615 if (USER_LABEL_PREFIX[0] == 0)
6616 putc ('%', file);
6617 fputs ("ds:", file);
6619 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6621 else if (flag_pic)
6622 output_pic_addr_const (file, addr, 0);
6623 else
6624 output_addr_const (file, addr);
6626 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6627 if (TARGET_64BIT
6628 && (GET_CODE (addr) == SYMBOL_REF
6629 || GET_CODE (addr) == LABEL_REF
6630 || (GET_CODE (addr) == CONST
6631 && GET_CODE (XEXP (addr, 0)) == PLUS
6632 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6633 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6634 fputs ("(%rip)", file);
6636 else
6638 if (ASSEMBLER_DIALECT == ASM_ATT)
6640 if (disp)
6642 if (flag_pic)
6643 output_pic_addr_const (file, disp, 0);
6644 else if (GET_CODE (disp) == LABEL_REF)
6645 output_asm_label (disp);
6646 else
6647 output_addr_const (file, disp);
6650 putc ('(', file);
6651 if (base)
6652 PRINT_REG (base, 0, file);
6653 if (index)
6655 putc (',', file);
6656 PRINT_REG (index, 0, file);
6657 if (scale != 1)
6658 fprintf (file, ",%d", scale);
6660 putc (')', file);
6662 else
6664 rtx offset = NULL_RTX;
6666 if (disp)
6668 /* Pull out the offset of a symbol; print any symbol itself. */
6669 if (GET_CODE (disp) == CONST
6670 && GET_CODE (XEXP (disp, 0)) == PLUS
6671 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6673 offset = XEXP (XEXP (disp, 0), 1);
6674 disp = gen_rtx_CONST (VOIDmode,
6675 XEXP (XEXP (disp, 0), 0));
6678 if (flag_pic)
6679 output_pic_addr_const (file, disp, 0);
6680 else if (GET_CODE (disp) == LABEL_REF)
6681 output_asm_label (disp);
6682 else if (GET_CODE (disp) == CONST_INT)
6683 offset = disp;
6684 else
6685 output_addr_const (file, disp);
6688 putc ('[', file);
6689 if (base)
6691 PRINT_REG (base, 0, file);
6692 if (offset)
6694 if (INTVAL (offset) >= 0)
6695 putc ('+', file);
6696 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6699 else if (offset)
6700 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6701 else
6702 putc ('0', file);
6704 if (index)
6706 putc ('+', file);
6707 PRINT_REG (index, 0, file);
6708 if (scale != 1)
6709 fprintf (file, "*%d", scale);
6711 putc (']', file);
6716 bool
6717 output_addr_const_extra (file, x)
6718 FILE *file;
6719 rtx x;
6721 rtx op;
6723 if (GET_CODE (x) != UNSPEC)
6724 return false;
6726 op = XVECEXP (x, 0, 0);
6727 switch (XINT (x, 1))
6729 case UNSPEC_GOTTPOFF:
6730 output_addr_const (file, op);
6731 fputs ("@GOTTPOFF", file);
6732 break;
6733 case UNSPEC_TPOFF:
6734 output_addr_const (file, op);
6735 fputs ("@TPOFF", file);
6736 break;
6737 case UNSPEC_NTPOFF:
6738 output_addr_const (file, op);
6739 fputs ("@NTPOFF", file);
6740 break;
6741 case UNSPEC_DTPOFF:
6742 output_addr_const (file, op);
6743 fputs ("@DTPOFF", file);
6744 break;
6746 default:
6747 return false;
6750 return true;
6753 /* Split one or more DImode RTL references into pairs of SImode
6754 references. The RTL can be REG, offsettable MEM, integer constant, or
6755 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6756 split and "num" is its length. lo_half and hi_half are output arrays
6757 that parallel "operands". */
6759 void
6760 split_di (operands, num, lo_half, hi_half)
6761 rtx operands[];
6762 int num;
6763 rtx lo_half[], hi_half[];
6765 while (num--)
6767 rtx op = operands[num];
6769 /* simplify_subreg refuse to split volatile memory addresses,
6770 but we still have to handle it. */
6771 if (GET_CODE (op) == MEM)
6773 lo_half[num] = adjust_address (op, SImode, 0);
6774 hi_half[num] = adjust_address (op, SImode, 4);
6776 else
6778 lo_half[num] = simplify_gen_subreg (SImode, op,
6779 GET_MODE (op) == VOIDmode
6780 ? DImode : GET_MODE (op), 0);
6781 hi_half[num] = simplify_gen_subreg (SImode, op,
6782 GET_MODE (op) == VOIDmode
6783 ? DImode : GET_MODE (op), 4);
6787 /* Split one or more TImode RTL references into pairs of SImode
6788 references. The RTL can be REG, offsettable MEM, integer constant, or
6789 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6790 split and "num" is its length. lo_half and hi_half are output arrays
6791 that parallel "operands". */
6793 void
6794 split_ti (operands, num, lo_half, hi_half)
6795 rtx operands[];
6796 int num;
6797 rtx lo_half[], hi_half[];
6799 while (num--)
6801 rtx op = operands[num];
6803 /* simplify_subreg refuse to split volatile memory addresses, but we
6804 still have to handle it. */
6805 if (GET_CODE (op) == MEM)
6807 lo_half[num] = adjust_address (op, DImode, 0);
6808 hi_half[num] = adjust_address (op, DImode, 8);
6810 else
6812 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6813 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6818 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6819 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6820 is the expression of the binary operation. The output may either be
6821 emitted here, or returned to the caller, like all output_* functions.
6823 There is no guarantee that the operands are the same mode, as they
6824 might be within FLOAT or FLOAT_EXTEND expressions. */
6826 #ifndef SYSV386_COMPAT
6827 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6828 wants to fix the assemblers because that causes incompatibility
6829 with gcc. No-one wants to fix gcc because that causes
6830 incompatibility with assemblers... You can use the option of
6831 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6832 #define SYSV386_COMPAT 1
6833 #endif
6835 const char *
6836 output_387_binary_op (insn, operands)
6837 rtx insn;
6838 rtx *operands;
6840 static char buf[30];
6841 const char *p;
6842 const char *ssep;
6843 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6845 #ifdef ENABLE_CHECKING
6846 /* Even if we do not want to check the inputs, this documents input
6847 constraints. Which helps in understanding the following code. */
6848 if (STACK_REG_P (operands[0])
6849 && ((REG_P (operands[1])
6850 && REGNO (operands[0]) == REGNO (operands[1])
6851 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6852 || (REG_P (operands[2])
6853 && REGNO (operands[0]) == REGNO (operands[2])
6854 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6855 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6856 ; /* ok */
6857 else if (!is_sse)
6858 abort ();
6859 #endif
6861 switch (GET_CODE (operands[3]))
6863 case PLUS:
6864 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6865 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6866 p = "fiadd";
6867 else
6868 p = "fadd";
6869 ssep = "add";
6870 break;
6872 case MINUS:
6873 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6874 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6875 p = "fisub";
6876 else
6877 p = "fsub";
6878 ssep = "sub";
6879 break;
6881 case MULT:
6882 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6883 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6884 p = "fimul";
6885 else
6886 p = "fmul";
6887 ssep = "mul";
6888 break;
6890 case DIV:
6891 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6892 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6893 p = "fidiv";
6894 else
6895 p = "fdiv";
6896 ssep = "div";
6897 break;
6899 default:
6900 abort ();
6903 if (is_sse)
6905 strcpy (buf, ssep);
6906 if (GET_MODE (operands[0]) == SFmode)
6907 strcat (buf, "ss\t{%2, %0|%0, %2}");
6908 else
6909 strcat (buf, "sd\t{%2, %0|%0, %2}");
6910 return buf;
6912 strcpy (buf, p);
6914 switch (GET_CODE (operands[3]))
6916 case MULT:
6917 case PLUS:
6918 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6920 rtx temp = operands[2];
6921 operands[2] = operands[1];
6922 operands[1] = temp;
6925 /* know operands[0] == operands[1]. */
6927 if (GET_CODE (operands[2]) == MEM)
6929 p = "%z2\t%2";
6930 break;
6933 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6935 if (STACK_TOP_P (operands[0]))
6936 /* How is it that we are storing to a dead operand[2]?
6937 Well, presumably operands[1] is dead too. We can't
6938 store the result to st(0) as st(0) gets popped on this
6939 instruction. Instead store to operands[2] (which I
6940 think has to be st(1)). st(1) will be popped later.
6941 gcc <= 2.8.1 didn't have this check and generated
6942 assembly code that the Unixware assembler rejected. */
6943 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6944 else
6945 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6946 break;
6949 if (STACK_TOP_P (operands[0]))
6950 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6951 else
6952 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6953 break;
6955 case MINUS:
6956 case DIV:
6957 if (GET_CODE (operands[1]) == MEM)
6959 p = "r%z1\t%1";
6960 break;
6963 if (GET_CODE (operands[2]) == MEM)
6965 p = "%z2\t%2";
6966 break;
6969 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6971 #if SYSV386_COMPAT
6972 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6973 derived assemblers, confusingly reverse the direction of
6974 the operation for fsub{r} and fdiv{r} when the
6975 destination register is not st(0). The Intel assembler
6976 doesn't have this brain damage. Read !SYSV386_COMPAT to
6977 figure out what the hardware really does. */
6978 if (STACK_TOP_P (operands[0]))
6979 p = "{p\t%0, %2|rp\t%2, %0}";
6980 else
6981 p = "{rp\t%2, %0|p\t%0, %2}";
6982 #else
6983 if (STACK_TOP_P (operands[0]))
6984 /* As above for fmul/fadd, we can't store to st(0). */
6985 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6986 else
6987 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6988 #endif
6989 break;
6992 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6994 #if SYSV386_COMPAT
6995 if (STACK_TOP_P (operands[0]))
6996 p = "{rp\t%0, %1|p\t%1, %0}";
6997 else
6998 p = "{p\t%1, %0|rp\t%0, %1}";
6999 #else
7000 if (STACK_TOP_P (operands[0]))
7001 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7002 else
7003 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7004 #endif
7005 break;
7008 if (STACK_TOP_P (operands[0]))
7010 if (STACK_TOP_P (operands[1]))
7011 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7012 else
7013 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7014 break;
7016 else if (STACK_TOP_P (operands[1]))
7018 #if SYSV386_COMPAT
7019 p = "{\t%1, %0|r\t%0, %1}";
7020 #else
7021 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7022 #endif
7024 else
7026 #if SYSV386_COMPAT
7027 p = "{r\t%2, %0|\t%0, %2}";
7028 #else
7029 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7030 #endif
7032 break;
7034 default:
7035 abort ();
7038 strcat (buf, p);
7039 return buf;
7042 /* Output code to initialize control word copies used by
7043 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7044 is set to control word rounding downwards. */
7045 void
7046 emit_i387_cw_initialization (normal, round_down)
7047 rtx normal, round_down;
7049 rtx reg = gen_reg_rtx (HImode);
7051 emit_insn (gen_x86_fnstcw_1 (normal));
7052 emit_move_insn (reg, normal);
7053 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7054 && !TARGET_64BIT)
7055 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7056 else
7057 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7058 emit_move_insn (round_down, reg);
7061 /* Output code for INSN to convert a float to a signed int. OPERANDS
7062 are the insn operands. The output may be [HSD]Imode and the input
7063 operand may be [SDX]Fmode. */
7065 const char *
7066 output_fix_trunc (insn, operands)
7067 rtx insn;
7068 rtx *operands;
7070 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7071 int dimode_p = GET_MODE (operands[0]) == DImode;
7073 /* Jump through a hoop or two for DImode, since the hardware has no
7074 non-popping instruction. We used to do this a different way, but
7075 that was somewhat fragile and broke with post-reload splitters. */
7076 if (dimode_p && !stack_top_dies)
7077 output_asm_insn ("fld\t%y1", operands);
7079 if (!STACK_TOP_P (operands[1]))
7080 abort ();
7082 if (GET_CODE (operands[0]) != MEM)
7083 abort ();
7085 output_asm_insn ("fldcw\t%3", operands);
7086 if (stack_top_dies || dimode_p)
7087 output_asm_insn ("fistp%z0\t%0", operands);
7088 else
7089 output_asm_insn ("fist%z0\t%0", operands);
7090 output_asm_insn ("fldcw\t%2", operands);
7092 return "";
7095 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7096 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7097 when fucom should be used. */
7099 const char *
7100 output_fp_compare (insn, operands, eflags_p, unordered_p)
7101 rtx insn;
7102 rtx *operands;
7103 int eflags_p, unordered_p;
7105 int stack_top_dies;
7106 rtx cmp_op0 = operands[0];
7107 rtx cmp_op1 = operands[1];
7108 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7110 if (eflags_p == 2)
7112 cmp_op0 = cmp_op1;
7113 cmp_op1 = operands[2];
7115 if (is_sse)
7117 if (GET_MODE (operands[0]) == SFmode)
7118 if (unordered_p)
7119 return "ucomiss\t{%1, %0|%0, %1}";
7120 else
7121 return "comiss\t{%1, %0|%0, %y}";
7122 else
7123 if (unordered_p)
7124 return "ucomisd\t{%1, %0|%0, %1}";
7125 else
7126 return "comisd\t{%1, %0|%0, %y}";
7129 if (! STACK_TOP_P (cmp_op0))
7130 abort ();
7132 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7134 if (STACK_REG_P (cmp_op1)
7135 && stack_top_dies
7136 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7137 && REGNO (cmp_op1) != FIRST_STACK_REG)
7139 /* If both the top of the 387 stack dies, and the other operand
7140 is also a stack register that dies, then this must be a
7141 `fcompp' float compare */
7143 if (eflags_p == 1)
7145 /* There is no double popping fcomi variant. Fortunately,
7146 eflags is immune from the fstp's cc clobbering. */
7147 if (unordered_p)
7148 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7149 else
7150 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7151 return "fstp\t%y0";
7153 else
7155 if (eflags_p == 2)
7157 if (unordered_p)
7158 return "fucompp\n\tfnstsw\t%0";
7159 else
7160 return "fcompp\n\tfnstsw\t%0";
7162 else
7164 if (unordered_p)
7165 return "fucompp";
7166 else
7167 return "fcompp";
7171 else
7173 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7175 static const char * const alt[24] =
7177 "fcom%z1\t%y1",
7178 "fcomp%z1\t%y1",
7179 "fucom%z1\t%y1",
7180 "fucomp%z1\t%y1",
7182 "ficom%z1\t%y1",
7183 "ficomp%z1\t%y1",
7184 NULL,
7185 NULL,
7187 "fcomi\t{%y1, %0|%0, %y1}",
7188 "fcomip\t{%y1, %0|%0, %y1}",
7189 "fucomi\t{%y1, %0|%0, %y1}",
7190 "fucomip\t{%y1, %0|%0, %y1}",
7192 NULL,
7193 NULL,
7194 NULL,
7195 NULL,
7197 "fcom%z2\t%y2\n\tfnstsw\t%0",
7198 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7199 "fucom%z2\t%y2\n\tfnstsw\t%0",
7200 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7202 "ficom%z2\t%y2\n\tfnstsw\t%0",
7203 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7204 NULL,
7205 NULL
7208 int mask;
7209 const char *ret;
7211 mask = eflags_p << 3;
7212 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7213 mask |= unordered_p << 1;
7214 mask |= stack_top_dies;
7216 if (mask >= 24)
7217 abort ();
7218 ret = alt[mask];
7219 if (ret == NULL)
7220 abort ();
7222 return ret;
7226 void
7227 ix86_output_addr_vec_elt (file, value)
7228 FILE *file;
7229 int value;
7231 const char *directive = ASM_LONG;
7233 if (TARGET_64BIT)
7235 #ifdef ASM_QUAD
7236 directive = ASM_QUAD;
7237 #else
7238 abort ();
7239 #endif
7242 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7245 void
7246 ix86_output_addr_diff_elt (file, value, rel)
7247 FILE *file;
7248 int value, rel;
7250 if (TARGET_64BIT)
7251 fprintf (file, "%s%s%d-%s%d\n",
7252 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7253 else if (HAVE_AS_GOTOFF_IN_DATA)
7254 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7255 else
7256 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7257 ASM_LONG, LPREFIX, value);
7260 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7261 for the target. */
7263 void
7264 ix86_expand_clear (dest)
7265 rtx dest;
7267 rtx tmp;
7269 /* We play register width games, which are only valid after reload. */
7270 if (!reload_completed)
7271 abort ();
7273 /* Avoid HImode and its attendant prefix byte. */
7274 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7275 dest = gen_rtx_REG (SImode, REGNO (dest));
7277 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7279 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7280 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7282 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7283 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7286 emit_insn (tmp);
7289 /* X is an unchanging MEM. If it is a constant pool reference, return
7290 the constant pool rtx, else NULL. */
7292 static rtx
7293 maybe_get_pool_constant (x)
7294 rtx x;
7296 x = XEXP (x, 0);
7298 if (flag_pic)
7300 if (GET_CODE (x) != PLUS)
7301 return NULL_RTX;
7302 if (XEXP (x, 0) != pic_offset_table_rtx)
7303 return NULL_RTX;
7304 x = XEXP (x, 1);
7305 if (GET_CODE (x) != CONST)
7306 return NULL_RTX;
7307 x = XEXP (x, 0);
7308 if (GET_CODE (x) != UNSPEC)
7309 return NULL_RTX;
7310 if (XINT (x, 1) != UNSPEC_GOTOFF)
7311 return NULL_RTX;
7312 x = XVECEXP (x, 0, 0);
7315 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7316 return get_pool_constant (x);
7318 return NULL_RTX;
7321 void
7322 ix86_expand_move (mode, operands)
7323 enum machine_mode mode;
7324 rtx operands[];
7326 int strict = (reload_in_progress || reload_completed);
7327 rtx insn, op0, op1, tmp;
7329 op0 = operands[0];
7330 op1 = operands[1];
7332 /* ??? We have a slight problem. We need to say that tls symbols are
7333 not legitimate constants so that reload does not helpfully reload
7334 these constants from a REG_EQUIV, which we cannot handle. (Recall
7335 that general- and local-dynamic address resolution requires a
7336 function call.)
7338 However, if we say that tls symbols are not legitimate constants,
7339 then emit_move_insn helpfully drop them into the constant pool.
7341 It is far easier to work around emit_move_insn than reload. Recognize
7342 the MEM that we would have created and extract the symbol_ref. */
7344 if (mode == Pmode
7345 && GET_CODE (op1) == MEM
7346 && RTX_UNCHANGING_P (op1))
7348 tmp = maybe_get_pool_constant (op1);
7349 /* Note that we only care about symbolic constants here, which
7350 unlike CONST_INT will always have a proper mode. */
7351 if (tmp && GET_MODE (tmp) == Pmode)
7352 op1 = tmp;
7355 if (tls_symbolic_operand (op1, Pmode))
7357 op1 = legitimize_address (op1, op1, VOIDmode);
7358 if (GET_CODE (op0) == MEM)
7360 tmp = gen_reg_rtx (mode);
7361 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7362 op1 = tmp;
7365 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7367 if (GET_CODE (op0) == MEM)
7368 op1 = force_reg (Pmode, op1);
7369 else
7371 rtx temp = op0;
7372 if (GET_CODE (temp) != REG)
7373 temp = gen_reg_rtx (Pmode);
7374 temp = legitimize_pic_address (op1, temp);
7375 if (temp == op0)
7376 return;
7377 op1 = temp;
7380 else
7382 if (GET_CODE (op0) == MEM
7383 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7384 || !push_operand (op0, mode))
7385 && GET_CODE (op1) == MEM)
7386 op1 = force_reg (mode, op1);
7388 if (push_operand (op0, mode)
7389 && ! general_no_elim_operand (op1, mode))
7390 op1 = copy_to_mode_reg (mode, op1);
7392 /* Force large constants in 64bit compilation into register
7393 to get them CSEed. */
7394 if (TARGET_64BIT && mode == DImode
7395 && immediate_operand (op1, mode)
7396 && !x86_64_zero_extended_value (op1)
7397 && !register_operand (op0, mode)
7398 && optimize && !reload_completed && !reload_in_progress)
7399 op1 = copy_to_mode_reg (mode, op1);
7401 if (FLOAT_MODE_P (mode))
7403 /* If we are loading a floating point constant to a register,
7404 force the value to memory now, since we'll get better code
7405 out the back end. */
7407 if (strict)
7409 else if (GET_CODE (op1) == CONST_DOUBLE
7410 && register_operand (op0, mode))
7411 op1 = validize_mem (force_const_mem (mode, op1));
7415 insn = gen_rtx_SET (VOIDmode, op0, op1);
7417 emit_insn (insn);
7420 void
7421 ix86_expand_vector_move (mode, operands)
7422 enum machine_mode mode;
7423 rtx operands[];
7425 /* Force constants other than zero into memory. We do not know how
7426 the instructions used to build constants modify the upper 64 bits
7427 of the register, once we have that information we may be able
7428 to handle some of them more efficiently. */
7429 if ((reload_in_progress | reload_completed) == 0
7430 && register_operand (operands[0], mode)
7431 && CONSTANT_P (operands[1]))
7433 rtx addr = gen_reg_rtx (Pmode);
7434 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7435 operands[1] = gen_rtx_MEM (mode, addr);
7438 /* Make operand1 a register if it isn't already. */
7439 if ((reload_in_progress | reload_completed) == 0
7440 && !register_operand (operands[0], mode)
7441 && !register_operand (operands[1], mode)
7442 && operands[1] != CONST0_RTX (mode))
7444 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7445 emit_move_insn (operands[0], temp);
7446 return;
7449 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7452 /* Attempt to expand a binary operator. Make the expansion closer to the
7453 actual machine, then just general_operand, which will allow 3 separate
7454 memory references (one output, two input) in a single insn. */
7456 void
7457 ix86_expand_binary_operator (code, mode, operands)
7458 enum rtx_code code;
7459 enum machine_mode mode;
7460 rtx operands[];
7462 int matching_memory;
7463 rtx src1, src2, dst, op, clob;
7465 dst = operands[0];
7466 src1 = operands[1];
7467 src2 = operands[2];
7469 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7470 if (GET_RTX_CLASS (code) == 'c'
7471 && (rtx_equal_p (dst, src2)
7472 || immediate_operand (src1, mode)))
7474 rtx temp = src1;
7475 src1 = src2;
7476 src2 = temp;
7479 /* If the destination is memory, and we do not have matching source
7480 operands, do things in registers. */
7481 matching_memory = 0;
7482 if (GET_CODE (dst) == MEM)
7484 if (rtx_equal_p (dst, src1))
7485 matching_memory = 1;
7486 else if (GET_RTX_CLASS (code) == 'c'
7487 && rtx_equal_p (dst, src2))
7488 matching_memory = 2;
7489 else
7490 dst = gen_reg_rtx (mode);
7493 /* Both source operands cannot be in memory. */
7494 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7496 if (matching_memory != 2)
7497 src2 = force_reg (mode, src2);
7498 else
7499 src1 = force_reg (mode, src1);
7502 /* If the operation is not commutable, source 1 cannot be a constant
7503 or non-matching memory. */
7504 if ((CONSTANT_P (src1)
7505 || (!matching_memory && GET_CODE (src1) == MEM))
7506 && GET_RTX_CLASS (code) != 'c')
7507 src1 = force_reg (mode, src1);
7509 /* If optimizing, copy to regs to improve CSE */
7510 if (optimize && ! no_new_pseudos)
7512 if (GET_CODE (dst) == MEM)
7513 dst = gen_reg_rtx (mode);
7514 if (GET_CODE (src1) == MEM)
7515 src1 = force_reg (mode, src1);
7516 if (GET_CODE (src2) == MEM)
7517 src2 = force_reg (mode, src2);
7520 /* Emit the instruction. */
7522 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7523 if (reload_in_progress)
7525 /* Reload doesn't know about the flags register, and doesn't know that
7526 it doesn't want to clobber it. We can only do this with PLUS. */
7527 if (code != PLUS)
7528 abort ();
7529 emit_insn (op);
7531 else
7533 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7534 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7537 /* Fix up the destination if needed. */
7538 if (dst != operands[0])
7539 emit_move_insn (operands[0], dst);
7542 /* Return TRUE or FALSE depending on whether the binary operator meets the
7543 appropriate constraints. */
7546 ix86_binary_operator_ok (code, mode, operands)
7547 enum rtx_code code;
7548 enum machine_mode mode ATTRIBUTE_UNUSED;
7549 rtx operands[3];
7551 /* Both source operands cannot be in memory. */
7552 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7553 return 0;
7554 /* If the operation is not commutable, source 1 cannot be a constant. */
7555 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7556 return 0;
7557 /* If the destination is memory, we must have a matching source operand. */
7558 if (GET_CODE (operands[0]) == MEM
7559 && ! (rtx_equal_p (operands[0], operands[1])
7560 || (GET_RTX_CLASS (code) == 'c'
7561 && rtx_equal_p (operands[0], operands[2]))))
7562 return 0;
7563 /* If the operation is not commutable and the source 1 is memory, we must
7564 have a matching destination. */
7565 if (GET_CODE (operands[1]) == MEM
7566 && GET_RTX_CLASS (code) != 'c'
7567 && ! rtx_equal_p (operands[0], operands[1]))
7568 return 0;
7569 return 1;
7572 /* Attempt to expand a unary operator. Make the expansion closer to the
7573 actual machine, then just general_operand, which will allow 2 separate
7574 memory references (one output, one input) in a single insn. */
7576 void
7577 ix86_expand_unary_operator (code, mode, operands)
7578 enum rtx_code code;
7579 enum machine_mode mode;
7580 rtx operands[];
7582 int matching_memory;
7583 rtx src, dst, op, clob;
7585 dst = operands[0];
7586 src = operands[1];
7588 /* If the destination is memory, and we do not have matching source
7589 operands, do things in registers. */
7590 matching_memory = 0;
7591 if (GET_CODE (dst) == MEM)
7593 if (rtx_equal_p (dst, src))
7594 matching_memory = 1;
7595 else
7596 dst = gen_reg_rtx (mode);
7599 /* When source operand is memory, destination must match. */
7600 if (!matching_memory && GET_CODE (src) == MEM)
7601 src = force_reg (mode, src);
7603 /* If optimizing, copy to regs to improve CSE */
7604 if (optimize && ! no_new_pseudos)
7606 if (GET_CODE (dst) == MEM)
7607 dst = gen_reg_rtx (mode);
7608 if (GET_CODE (src) == MEM)
7609 src = force_reg (mode, src);
7612 /* Emit the instruction. */
7614 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7615 if (reload_in_progress || code == NOT)
7617 /* Reload doesn't know about the flags register, and doesn't know that
7618 it doesn't want to clobber it. */
7619 if (code != NOT)
7620 abort ();
7621 emit_insn (op);
7623 else
7625 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7626 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7629 /* Fix up the destination if needed. */
7630 if (dst != operands[0])
7631 emit_move_insn (operands[0], dst);
7634 /* Return TRUE or FALSE depending on whether the unary operator meets the
7635 appropriate constraints. */
7638 ix86_unary_operator_ok (code, mode, operands)
7639 enum rtx_code code ATTRIBUTE_UNUSED;
7640 enum machine_mode mode ATTRIBUTE_UNUSED;
7641 rtx operands[2] ATTRIBUTE_UNUSED;
7643 /* If one of operands is memory, source and destination must match. */
7644 if ((GET_CODE (operands[0]) == MEM
7645 || GET_CODE (operands[1]) == MEM)
7646 && ! rtx_equal_p (operands[0], operands[1]))
7647 return FALSE;
7648 return TRUE;
7651 /* Return TRUE or FALSE depending on whether the first SET in INSN
7652 has source and destination with matching CC modes, and that the
7653 CC mode is at least as constrained as REQ_MODE. */
7656 ix86_match_ccmode (insn, req_mode)
7657 rtx insn;
7658 enum machine_mode req_mode;
7660 rtx set;
7661 enum machine_mode set_mode;
7663 set = PATTERN (insn);
7664 if (GET_CODE (set) == PARALLEL)
7665 set = XVECEXP (set, 0, 0);
7666 if (GET_CODE (set) != SET)
7667 abort ();
7668 if (GET_CODE (SET_SRC (set)) != COMPARE)
7669 abort ();
7671 set_mode = GET_MODE (SET_DEST (set));
7672 switch (set_mode)
7674 case CCNOmode:
7675 if (req_mode != CCNOmode
7676 && (req_mode != CCmode
7677 || XEXP (SET_SRC (set), 1) != const0_rtx))
7678 return 0;
7679 break;
7680 case CCmode:
7681 if (req_mode == CCGCmode)
7682 return 0;
7683 /* FALLTHRU */
7684 case CCGCmode:
7685 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7686 return 0;
7687 /* FALLTHRU */
7688 case CCGOCmode:
7689 if (req_mode == CCZmode)
7690 return 0;
7691 /* FALLTHRU */
7692 case CCZmode:
7693 break;
7695 default:
7696 abort ();
7699 return (GET_MODE (SET_SRC (set)) == set_mode);
7702 /* Generate insn patterns to do an integer compare of OPERANDS. */
7704 static rtx
7705 ix86_expand_int_compare (code, op0, op1)
7706 enum rtx_code code;
7707 rtx op0, op1;
7709 enum machine_mode cmpmode;
7710 rtx tmp, flags;
7712 cmpmode = SELECT_CC_MODE (code, op0, op1);
7713 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7715 /* This is very simple, but making the interface the same as in the
7716 FP case makes the rest of the code easier. */
7717 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7718 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7720 /* Return the test that should be put into the flags user, i.e.
7721 the bcc, scc, or cmov instruction. */
7722 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7725 /* Figure out whether to use ordered or unordered fp comparisons.
7726 Return the appropriate mode to use. */
7728 enum machine_mode
7729 ix86_fp_compare_mode (code)
7730 enum rtx_code code ATTRIBUTE_UNUSED;
7732 /* ??? In order to make all comparisons reversible, we do all comparisons
7733 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7734 all forms trapping and nontrapping comparisons, we can make inequality
7735 comparisons trapping again, since it results in better code when using
7736 FCOM based compares. */
7737 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7740 enum machine_mode
7741 ix86_cc_mode (code, op0, op1)
7742 enum rtx_code code;
7743 rtx op0, op1;
7745 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7746 return ix86_fp_compare_mode (code);
7747 switch (code)
7749 /* Only zero flag is needed. */
7750 case EQ: /* ZF=0 */
7751 case NE: /* ZF!=0 */
7752 return CCZmode;
7753 /* Codes needing carry flag. */
7754 case GEU: /* CF=0 */
7755 case GTU: /* CF=0 & ZF=0 */
7756 case LTU: /* CF=1 */
7757 case LEU: /* CF=1 | ZF=1 */
7758 return CCmode;
7759 /* Codes possibly doable only with sign flag when
7760 comparing against zero. */
7761 case GE: /* SF=OF or SF=0 */
7762 case LT: /* SF<>OF or SF=1 */
7763 if (op1 == const0_rtx)
7764 return CCGOCmode;
7765 else
7766 /* For other cases Carry flag is not required. */
7767 return CCGCmode;
7768 /* Codes doable only with sign flag when comparing
7769 against zero, but we miss jump instruction for it
7770 so we need to use relational tests agains overflow
7771 that thus needs to be zero. */
7772 case GT: /* ZF=0 & SF=OF */
7773 case LE: /* ZF=1 | SF<>OF */
7774 if (op1 == const0_rtx)
7775 return CCNOmode;
7776 else
7777 return CCGCmode;
7778 /* strcmp pattern do (use flags) and combine may ask us for proper
7779 mode. */
7780 case USE:
7781 return CCmode;
7782 default:
7783 abort ();
7787 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7790 ix86_use_fcomi_compare (code)
7791 enum rtx_code code ATTRIBUTE_UNUSED;
7793 enum rtx_code swapped_code = swap_condition (code);
7794 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7795 || (ix86_fp_comparison_cost (swapped_code)
7796 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7799 /* Swap, force into registers, or otherwise massage the two operands
7800 to a fp comparison. The operands are updated in place; the new
7801 comparsion code is returned. */
7803 static enum rtx_code
7804 ix86_prepare_fp_compare_args (code, pop0, pop1)
7805 enum rtx_code code;
7806 rtx *pop0, *pop1;
7808 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7809 rtx op0 = *pop0, op1 = *pop1;
7810 enum machine_mode op_mode = GET_MODE (op0);
7811 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7813 /* All of the unordered compare instructions only work on registers.
7814 The same is true of the XFmode compare instructions. The same is
7815 true of the fcomi compare instructions. */
7817 if (!is_sse
7818 && (fpcmp_mode == CCFPUmode
7819 || op_mode == XFmode
7820 || op_mode == TFmode
7821 || ix86_use_fcomi_compare (code)))
7823 op0 = force_reg (op_mode, op0);
7824 op1 = force_reg (op_mode, op1);
7826 else
7828 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7829 things around if they appear profitable, otherwise force op0
7830 into a register. */
7832 if (standard_80387_constant_p (op0) == 0
7833 || (GET_CODE (op0) == MEM
7834 && ! (standard_80387_constant_p (op1) == 0
7835 || GET_CODE (op1) == MEM)))
7837 rtx tmp;
7838 tmp = op0, op0 = op1, op1 = tmp;
7839 code = swap_condition (code);
7842 if (GET_CODE (op0) != REG)
7843 op0 = force_reg (op_mode, op0);
7845 if (CONSTANT_P (op1))
7847 if (standard_80387_constant_p (op1))
7848 op1 = force_reg (op_mode, op1);
7849 else
7850 op1 = validize_mem (force_const_mem (op_mode, op1));
7854 /* Try to rearrange the comparison to make it cheaper. */
7855 if (ix86_fp_comparison_cost (code)
7856 > ix86_fp_comparison_cost (swap_condition (code))
7857 && (GET_CODE (op1) == REG || !no_new_pseudos))
7859 rtx tmp;
7860 tmp = op0, op0 = op1, op1 = tmp;
7861 code = swap_condition (code);
7862 if (GET_CODE (op0) != REG)
7863 op0 = force_reg (op_mode, op0);
7866 *pop0 = op0;
7867 *pop1 = op1;
7868 return code;
7871 /* Convert comparison codes we use to represent FP comparison to integer
7872 code that will result in proper branch. Return UNKNOWN if no such code
7873 is available. */
7874 static enum rtx_code
7875 ix86_fp_compare_code_to_integer (code)
7876 enum rtx_code code;
7878 switch (code)
7880 case GT:
7881 return GTU;
7882 case GE:
7883 return GEU;
7884 case ORDERED:
7885 case UNORDERED:
7886 return code;
7887 break;
7888 case UNEQ:
7889 return EQ;
7890 break;
7891 case UNLT:
7892 return LTU;
7893 break;
7894 case UNLE:
7895 return LEU;
7896 break;
7897 case LTGT:
7898 return NE;
7899 break;
7900 default:
7901 return UNKNOWN;
7905 /* Split comparison code CODE into comparisons we can do using branch
7906 instructions. BYPASS_CODE is comparison code for branch that will
7907 branch around FIRST_CODE and SECOND_CODE. If some of branches
7908 is not required, set value to NIL.
7909 We never require more than two branches. */
7910 static void
7911 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7912 enum rtx_code code, *bypass_code, *first_code, *second_code;
7914 *first_code = code;
7915 *bypass_code = NIL;
7916 *second_code = NIL;
7918 /* The fcomi comparison sets flags as follows:
7920 cmp ZF PF CF
7921 > 0 0 0
7922 < 0 0 1
7923 = 1 0 0
7924 un 1 1 1 */
7926 switch (code)
7928 case GT: /* GTU - CF=0 & ZF=0 */
7929 case GE: /* GEU - CF=0 */
7930 case ORDERED: /* PF=0 */
7931 case UNORDERED: /* PF=1 */
7932 case UNEQ: /* EQ - ZF=1 */
7933 case UNLT: /* LTU - CF=1 */
7934 case UNLE: /* LEU - CF=1 | ZF=1 */
7935 case LTGT: /* EQ - ZF=0 */
7936 break;
7937 case LT: /* LTU - CF=1 - fails on unordered */
7938 *first_code = UNLT;
7939 *bypass_code = UNORDERED;
7940 break;
7941 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7942 *first_code = UNLE;
7943 *bypass_code = UNORDERED;
7944 break;
7945 case EQ: /* EQ - ZF=1 - fails on unordered */
7946 *first_code = UNEQ;
7947 *bypass_code = UNORDERED;
7948 break;
7949 case NE: /* NE - ZF=0 - fails on unordered */
7950 *first_code = LTGT;
7951 *second_code = UNORDERED;
7952 break;
7953 case UNGE: /* GEU - CF=0 - fails on unordered */
7954 *first_code = GE;
7955 *second_code = UNORDERED;
7956 break;
7957 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7958 *first_code = GT;
7959 *second_code = UNORDERED;
7960 break;
7961 default:
7962 abort ();
7964 if (!TARGET_IEEE_FP)
7966 *second_code = NIL;
7967 *bypass_code = NIL;
7971 /* Return cost of comparison done fcom + arithmetics operations on AX.
7972 All following functions do use number of instructions as an cost metrics.
7973 In future this should be tweaked to compute bytes for optimize_size and
7974 take into account performance of various instructions on various CPUs. */
7975 static int
7976 ix86_fp_comparison_arithmetics_cost (code)
7977 enum rtx_code code;
7979 if (!TARGET_IEEE_FP)
7980 return 4;
7981 /* The cost of code output by ix86_expand_fp_compare. */
7982 switch (code)
7984 case UNLE:
7985 case UNLT:
7986 case LTGT:
7987 case GT:
7988 case GE:
7989 case UNORDERED:
7990 case ORDERED:
7991 case UNEQ:
7992 return 4;
7993 break;
7994 case LT:
7995 case NE:
7996 case EQ:
7997 case UNGE:
7998 return 5;
7999 break;
8000 case LE:
8001 case UNGT:
8002 return 6;
8003 break;
8004 default:
8005 abort ();
8009 /* Return cost of comparison done using fcomi operation.
8010 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8011 static int
8012 ix86_fp_comparison_fcomi_cost (code)
8013 enum rtx_code code;
8015 enum rtx_code bypass_code, first_code, second_code;
8016 /* Return arbitarily high cost when instruction is not supported - this
8017 prevents gcc from using it. */
8018 if (!TARGET_CMOVE)
8019 return 1024;
8020 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8021 return (bypass_code != NIL || second_code != NIL) + 2;
8024 /* Return cost of comparison done using sahf operation.
8025 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8026 static int
8027 ix86_fp_comparison_sahf_cost (code)
8028 enum rtx_code code;
8030 enum rtx_code bypass_code, first_code, second_code;
8031 /* Return arbitarily high cost when instruction is not preferred - this
8032 avoids gcc from using it. */
8033 if (!TARGET_USE_SAHF && !optimize_size)
8034 return 1024;
8035 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8036 return (bypass_code != NIL || second_code != NIL) + 3;
8039 /* Compute cost of the comparison done using any method.
8040 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8041 static int
8042 ix86_fp_comparison_cost (code)
8043 enum rtx_code code;
8045 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8046 int min;
8048 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8049 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8051 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8052 if (min > sahf_cost)
8053 min = sahf_cost;
8054 if (min > fcomi_cost)
8055 min = fcomi_cost;
8056 return min;
8059 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8061 static rtx
8062 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8063 enum rtx_code code;
8064 rtx op0, op1, scratch;
8065 rtx *second_test;
8066 rtx *bypass_test;
8068 enum machine_mode fpcmp_mode, intcmp_mode;
8069 rtx tmp, tmp2;
8070 int cost = ix86_fp_comparison_cost (code);
8071 enum rtx_code bypass_code, first_code, second_code;
8073 fpcmp_mode = ix86_fp_compare_mode (code);
8074 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8076 if (second_test)
8077 *second_test = NULL_RTX;
8078 if (bypass_test)
8079 *bypass_test = NULL_RTX;
8081 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8083 /* Do fcomi/sahf based test when profitable. */
8084 if ((bypass_code == NIL || bypass_test)
8085 && (second_code == NIL || second_test)
8086 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8088 if (TARGET_CMOVE)
8090 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8091 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8092 tmp);
8093 emit_insn (tmp);
8095 else
8097 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8098 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8099 if (!scratch)
8100 scratch = gen_reg_rtx (HImode);
8101 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8102 emit_insn (gen_x86_sahf_1 (scratch));
8105 /* The FP codes work out to act like unsigned. */
8106 intcmp_mode = fpcmp_mode;
8107 code = first_code;
8108 if (bypass_code != NIL)
8109 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8110 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8111 const0_rtx);
8112 if (second_code != NIL)
8113 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8114 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8115 const0_rtx);
8117 else
8119 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8120 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8121 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8122 if (!scratch)
8123 scratch = gen_reg_rtx (HImode);
8124 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8126 /* In the unordered case, we have to check C2 for NaN's, which
8127 doesn't happen to work out to anything nice combination-wise.
8128 So do some bit twiddling on the value we've got in AH to come
8129 up with an appropriate set of condition codes. */
8131 intcmp_mode = CCNOmode;
8132 switch (code)
8134 case GT:
8135 case UNGT:
8136 if (code == GT || !TARGET_IEEE_FP)
8138 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8139 code = EQ;
8141 else
8143 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8144 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8145 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8146 intcmp_mode = CCmode;
8147 code = GEU;
8149 break;
8150 case LT:
8151 case UNLT:
8152 if (code == LT && TARGET_IEEE_FP)
8154 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8155 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8156 intcmp_mode = CCmode;
8157 code = EQ;
8159 else
8161 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8162 code = NE;
8164 break;
8165 case GE:
8166 case UNGE:
8167 if (code == GE || !TARGET_IEEE_FP)
8169 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8170 code = EQ;
8172 else
8174 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8175 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8176 GEN_INT (0x01)));
8177 code = NE;
8179 break;
8180 case LE:
8181 case UNLE:
8182 if (code == LE && TARGET_IEEE_FP)
8184 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8185 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8186 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8187 intcmp_mode = CCmode;
8188 code = LTU;
8190 else
8192 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8193 code = NE;
8195 break;
8196 case EQ:
8197 case UNEQ:
8198 if (code == EQ && TARGET_IEEE_FP)
8200 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8201 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8202 intcmp_mode = CCmode;
8203 code = EQ;
8205 else
8207 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8208 code = NE;
8209 break;
8211 break;
8212 case NE:
8213 case LTGT:
8214 if (code == NE && TARGET_IEEE_FP)
8216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8217 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8218 GEN_INT (0x40)));
8219 code = NE;
8221 else
8223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8224 code = EQ;
8226 break;
8228 case UNORDERED:
8229 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8230 code = NE;
8231 break;
8232 case ORDERED:
8233 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8234 code = EQ;
8235 break;
8237 default:
8238 abort ();
8242 /* Return the test that should be put into the flags user, i.e.
8243 the bcc, scc, or cmov instruction. */
8244 return gen_rtx_fmt_ee (code, VOIDmode,
8245 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8246 const0_rtx);
8250 ix86_expand_compare (code, second_test, bypass_test)
8251 enum rtx_code code;
8252 rtx *second_test, *bypass_test;
8254 rtx op0, op1, ret;
8255 op0 = ix86_compare_op0;
8256 op1 = ix86_compare_op1;
8258 if (second_test)
8259 *second_test = NULL_RTX;
8260 if (bypass_test)
8261 *bypass_test = NULL_RTX;
8263 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8264 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8265 second_test, bypass_test);
8266 else
8267 ret = ix86_expand_int_compare (code, op0, op1);
8269 return ret;
8272 /* Return true if the CODE will result in nontrivial jump sequence. */
8273 bool
8274 ix86_fp_jump_nontrivial_p (code)
8275 enum rtx_code code;
8277 enum rtx_code bypass_code, first_code, second_code;
8278 if (!TARGET_CMOVE)
8279 return true;
8280 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8281 return bypass_code != NIL || second_code != NIL;
8284 void
8285 ix86_expand_branch (code, label)
8286 enum rtx_code code;
8287 rtx label;
8289 rtx tmp;
8291 switch (GET_MODE (ix86_compare_op0))
8293 case QImode:
8294 case HImode:
8295 case SImode:
8296 simple:
8297 tmp = ix86_expand_compare (code, NULL, NULL);
8298 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8299 gen_rtx_LABEL_REF (VOIDmode, label),
8300 pc_rtx);
8301 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8302 return;
8304 case SFmode:
8305 case DFmode:
8306 case XFmode:
8307 case TFmode:
8309 rtvec vec;
8310 int use_fcomi;
8311 enum rtx_code bypass_code, first_code, second_code;
8313 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8314 &ix86_compare_op1);
8316 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8318 /* Check whether we will use the natural sequence with one jump. If
8319 so, we can expand jump early. Otherwise delay expansion by
8320 creating compound insn to not confuse optimizers. */
8321 if (bypass_code == NIL && second_code == NIL
8322 && TARGET_CMOVE)
8324 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8325 gen_rtx_LABEL_REF (VOIDmode, label),
8326 pc_rtx, NULL_RTX);
8328 else
8330 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8331 ix86_compare_op0, ix86_compare_op1);
8332 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8333 gen_rtx_LABEL_REF (VOIDmode, label),
8334 pc_rtx);
8335 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8337 use_fcomi = ix86_use_fcomi_compare (code);
8338 vec = rtvec_alloc (3 + !use_fcomi);
8339 RTVEC_ELT (vec, 0) = tmp;
8340 RTVEC_ELT (vec, 1)
8341 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8342 RTVEC_ELT (vec, 2)
8343 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8344 if (! use_fcomi)
8345 RTVEC_ELT (vec, 3)
8346 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8348 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8350 return;
8353 case DImode:
8354 if (TARGET_64BIT)
8355 goto simple;
8356 /* Expand DImode branch into multiple compare+branch. */
8358 rtx lo[2], hi[2], label2;
8359 enum rtx_code code1, code2, code3;
8361 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8363 tmp = ix86_compare_op0;
8364 ix86_compare_op0 = ix86_compare_op1;
8365 ix86_compare_op1 = tmp;
8366 code = swap_condition (code);
8368 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8369 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8371 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8372 avoid two branches. This costs one extra insn, so disable when
8373 optimizing for size. */
8375 if ((code == EQ || code == NE)
8376 && (!optimize_size
8377 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8379 rtx xor0, xor1;
8381 xor1 = hi[0];
8382 if (hi[1] != const0_rtx)
8383 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8384 NULL_RTX, 0, OPTAB_WIDEN);
8386 xor0 = lo[0];
8387 if (lo[1] != const0_rtx)
8388 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8389 NULL_RTX, 0, OPTAB_WIDEN);
8391 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8392 NULL_RTX, 0, OPTAB_WIDEN);
8394 ix86_compare_op0 = tmp;
8395 ix86_compare_op1 = const0_rtx;
8396 ix86_expand_branch (code, label);
8397 return;
8400 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8401 op1 is a constant and the low word is zero, then we can just
8402 examine the high word. */
8404 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8405 switch (code)
8407 case LT: case LTU: case GE: case GEU:
8408 ix86_compare_op0 = hi[0];
8409 ix86_compare_op1 = hi[1];
8410 ix86_expand_branch (code, label);
8411 return;
8412 default:
8413 break;
8416 /* Otherwise, we need two or three jumps. */
8418 label2 = gen_label_rtx ();
8420 code1 = code;
8421 code2 = swap_condition (code);
8422 code3 = unsigned_condition (code);
8424 switch (code)
8426 case LT: case GT: case LTU: case GTU:
8427 break;
8429 case LE: code1 = LT; code2 = GT; break;
8430 case GE: code1 = GT; code2 = LT; break;
8431 case LEU: code1 = LTU; code2 = GTU; break;
8432 case GEU: code1 = GTU; code2 = LTU; break;
8434 case EQ: code1 = NIL; code2 = NE; break;
8435 case NE: code2 = NIL; break;
8437 default:
8438 abort ();
8442 * a < b =>
8443 * if (hi(a) < hi(b)) goto true;
8444 * if (hi(a) > hi(b)) goto false;
8445 * if (lo(a) < lo(b)) goto true;
8446 * false:
8449 ix86_compare_op0 = hi[0];
8450 ix86_compare_op1 = hi[1];
8452 if (code1 != NIL)
8453 ix86_expand_branch (code1, label);
8454 if (code2 != NIL)
8455 ix86_expand_branch (code2, label2);
8457 ix86_compare_op0 = lo[0];
8458 ix86_compare_op1 = lo[1];
8459 ix86_expand_branch (code3, label);
8461 if (code2 != NIL)
8462 emit_label (label2);
8463 return;
8466 default:
8467 abort ();
8471 /* Split branch based on floating point condition. */
8472 void
8473 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8474 enum rtx_code code;
8475 rtx op1, op2, target1, target2, tmp;
8477 rtx second, bypass;
8478 rtx label = NULL_RTX;
8479 rtx condition;
8480 int bypass_probability = -1, second_probability = -1, probability = -1;
8481 rtx i;
8483 if (target2 != pc_rtx)
8485 rtx tmp = target2;
8486 code = reverse_condition_maybe_unordered (code);
8487 target2 = target1;
8488 target1 = tmp;
8491 condition = ix86_expand_fp_compare (code, op1, op2,
8492 tmp, &second, &bypass);
8494 if (split_branch_probability >= 0)
8496 /* Distribute the probabilities across the jumps.
8497 Assume the BYPASS and SECOND to be always test
8498 for UNORDERED. */
8499 probability = split_branch_probability;
8501 /* Value of 1 is low enough to make no need for probability
8502 to be updated. Later we may run some experiments and see
8503 if unordered values are more frequent in practice. */
8504 if (bypass)
8505 bypass_probability = 1;
8506 if (second)
8507 second_probability = 1;
8509 if (bypass != NULL_RTX)
8511 label = gen_label_rtx ();
8512 i = emit_jump_insn (gen_rtx_SET
8513 (VOIDmode, pc_rtx,
8514 gen_rtx_IF_THEN_ELSE (VOIDmode,
8515 bypass,
8516 gen_rtx_LABEL_REF (VOIDmode,
8517 label),
8518 pc_rtx)));
8519 if (bypass_probability >= 0)
8520 REG_NOTES (i)
8521 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8522 GEN_INT (bypass_probability),
8523 REG_NOTES (i));
8525 i = emit_jump_insn (gen_rtx_SET
8526 (VOIDmode, pc_rtx,
8527 gen_rtx_IF_THEN_ELSE (VOIDmode,
8528 condition, target1, target2)));
8529 if (probability >= 0)
8530 REG_NOTES (i)
8531 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8532 GEN_INT (probability),
8533 REG_NOTES (i));
8534 if (second != NULL_RTX)
8536 i = emit_jump_insn (gen_rtx_SET
8537 (VOIDmode, pc_rtx,
8538 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8539 target2)));
8540 if (second_probability >= 0)
8541 REG_NOTES (i)
8542 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8543 GEN_INT (second_probability),
8544 REG_NOTES (i));
8546 if (label != NULL_RTX)
8547 emit_label (label);
8551 ix86_expand_setcc (code, dest)
8552 enum rtx_code code;
8553 rtx dest;
8555 rtx ret, tmp, tmpreg;
8556 rtx second_test, bypass_test;
8558 if (GET_MODE (ix86_compare_op0) == DImode
8559 && !TARGET_64BIT)
8560 return 0; /* FAIL */
8562 if (GET_MODE (dest) != QImode)
8563 abort ();
8565 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8566 PUT_MODE (ret, QImode);
8568 tmp = dest;
8569 tmpreg = dest;
8571 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8572 if (bypass_test || second_test)
8574 rtx test = second_test;
8575 int bypass = 0;
8576 rtx tmp2 = gen_reg_rtx (QImode);
8577 if (bypass_test)
8579 if (second_test)
8580 abort ();
8581 test = bypass_test;
8582 bypass = 1;
8583 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8585 PUT_MODE (test, QImode);
8586 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8588 if (bypass)
8589 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8590 else
8591 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8594 return 1; /* DONE */
8598 ix86_expand_int_movcc (operands)
8599 rtx operands[];
8601 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8602 rtx compare_seq, compare_op;
8603 rtx second_test, bypass_test;
8604 enum machine_mode mode = GET_MODE (operands[0]);
8606 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8607 In case comparsion is done with immediate, we can convert it to LTU or
8608 GEU by altering the integer. */
8610 if ((code == LEU || code == GTU)
8611 && GET_CODE (ix86_compare_op1) == CONST_INT
8612 && mode != HImode
8613 && INTVAL (ix86_compare_op1) != -1
8614 /* For x86-64, the immediate field in the instruction is 32-bit
8615 signed, so we can't increment a DImode value above 0x7fffffff. */
8616 && (!TARGET_64BIT
8617 || GET_MODE (ix86_compare_op0) != DImode
8618 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8619 && GET_CODE (operands[2]) == CONST_INT
8620 && GET_CODE (operands[3]) == CONST_INT)
8622 if (code == LEU)
8623 code = LTU;
8624 else
8625 code = GEU;
8626 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8627 GET_MODE (ix86_compare_op0));
8630 start_sequence ();
8631 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8632 compare_seq = get_insns ();
8633 end_sequence ();
8635 compare_code = GET_CODE (compare_op);
8637 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8638 HImode insns, we'd be swallowed in word prefix ops. */
8640 if (mode != HImode
8641 && (mode != DImode || TARGET_64BIT)
8642 && GET_CODE (operands[2]) == CONST_INT
8643 && GET_CODE (operands[3]) == CONST_INT)
8645 rtx out = operands[0];
8646 HOST_WIDE_INT ct = INTVAL (operands[2]);
8647 HOST_WIDE_INT cf = INTVAL (operands[3]);
8648 HOST_WIDE_INT diff;
8650 if ((compare_code == LTU || compare_code == GEU)
8651 && !second_test && !bypass_test)
8654 /* Detect overlap between destination and compare sources. */
8655 rtx tmp = out;
8657 /* To simplify rest of code, restrict to the GEU case. */
8658 if (compare_code == LTU)
8660 int tmp = ct;
8661 ct = cf;
8662 cf = tmp;
8663 compare_code = reverse_condition (compare_code);
8664 code = reverse_condition (code);
8666 diff = ct - cf;
8668 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8669 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8670 tmp = gen_reg_rtx (mode);
8672 emit_insn (compare_seq);
8673 if (mode == DImode)
8674 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8675 else
8676 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8678 if (diff == 1)
8681 * cmpl op0,op1
8682 * sbbl dest,dest
8683 * [addl dest, ct]
8685 * Size 5 - 8.
8687 if (ct)
8688 tmp = expand_simple_binop (mode, PLUS,
8689 tmp, GEN_INT (ct),
8690 tmp, 1, OPTAB_DIRECT);
8692 else if (cf == -1)
8695 * cmpl op0,op1
8696 * sbbl dest,dest
8697 * orl $ct, dest
8699 * Size 8.
8701 tmp = expand_simple_binop (mode, IOR,
8702 tmp, GEN_INT (ct),
8703 tmp, 1, OPTAB_DIRECT);
8705 else if (diff == -1 && ct)
8708 * cmpl op0,op1
8709 * sbbl dest,dest
8710 * xorl $-1, dest
8711 * [addl dest, cf]
8713 * Size 8 - 11.
8715 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8716 if (cf)
8717 tmp = expand_simple_binop (mode, PLUS,
8718 tmp, GEN_INT (cf),
8719 tmp, 1, OPTAB_DIRECT);
8721 else
8724 * cmpl op0,op1
8725 * sbbl dest,dest
8726 * andl cf - ct, dest
8727 * [addl dest, ct]
8729 * Size 8 - 11.
8731 tmp = expand_simple_binop (mode, AND,
8732 tmp,
8733 gen_int_mode (cf - ct, mode),
8734 tmp, 1, OPTAB_DIRECT);
8735 if (ct)
8736 tmp = expand_simple_binop (mode, PLUS,
8737 tmp, GEN_INT (ct),
8738 tmp, 1, OPTAB_DIRECT);
8741 if (tmp != out)
8742 emit_move_insn (out, tmp);
8744 return 1; /* DONE */
8747 diff = ct - cf;
8748 if (diff < 0)
8750 HOST_WIDE_INT tmp;
8751 tmp = ct, ct = cf, cf = tmp;
8752 diff = -diff;
8753 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8755 /* We may be reversing unordered compare to normal compare, that
8756 is not valid in general (we may convert non-trapping condition
8757 to trapping one), however on i386 we currently emit all
8758 comparisons unordered. */
8759 compare_code = reverse_condition_maybe_unordered (compare_code);
8760 code = reverse_condition_maybe_unordered (code);
8762 else
8764 compare_code = reverse_condition (compare_code);
8765 code = reverse_condition (code);
8769 compare_code = NIL;
8770 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8771 && GET_CODE (ix86_compare_op1) == CONST_INT)
8773 if (ix86_compare_op1 == const0_rtx
8774 && (code == LT || code == GE))
8775 compare_code = code;
8776 else if (ix86_compare_op1 == constm1_rtx)
8778 if (code == LE)
8779 compare_code = LT;
8780 else if (code == GT)
8781 compare_code = GE;
8785 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8786 if (compare_code != NIL
8787 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8788 && (cf == -1 || ct == -1))
8790 /* If lea code below could be used, only optimize
8791 if it results in a 2 insn sequence. */
8793 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8794 || diff == 3 || diff == 5 || diff == 9)
8795 || (compare_code == LT && ct == -1)
8796 || (compare_code == GE && cf == -1))
8799 * notl op1 (if necessary)
8800 * sarl $31, op1
8801 * orl cf, op1
8803 if (ct != -1)
8805 cf = ct;
8806 ct = -1;
8807 code = reverse_condition (code);
8810 out = emit_store_flag (out, code, ix86_compare_op0,
8811 ix86_compare_op1, VOIDmode, 0, -1);
8813 out = expand_simple_binop (mode, IOR,
8814 out, GEN_INT (cf),
8815 out, 1, OPTAB_DIRECT);
8816 if (out != operands[0])
8817 emit_move_insn (operands[0], out);
8819 return 1; /* DONE */
8823 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8824 || diff == 3 || diff == 5 || diff == 9)
8825 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8828 * xorl dest,dest
8829 * cmpl op1,op2
8830 * setcc dest
8831 * lea cf(dest*(ct-cf)),dest
8833 * Size 14.
8835 * This also catches the degenerate setcc-only case.
8838 rtx tmp;
8839 int nops;
8841 out = emit_store_flag (out, code, ix86_compare_op0,
8842 ix86_compare_op1, VOIDmode, 0, 1);
8844 nops = 0;
8845 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8846 done in proper mode to match. */
8847 if (diff == 1)
8848 tmp = out;
8849 else
8851 rtx out1;
8852 out1 = out;
8853 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8854 nops++;
8855 if (diff & 1)
8857 tmp = gen_rtx_PLUS (mode, tmp, out1);
8858 nops++;
8861 if (cf != 0)
8863 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8864 nops++;
8866 if (tmp != out
8867 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8869 if (nops == 1)
8871 rtx clob;
8873 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8874 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8876 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8877 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8878 emit_insn (tmp);
8880 else
8881 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8883 if (out != operands[0])
8884 emit_move_insn (operands[0], out);
8886 return 1; /* DONE */
8890 * General case: Jumpful:
8891 * xorl dest,dest cmpl op1, op2
8892 * cmpl op1, op2 movl ct, dest
8893 * setcc dest jcc 1f
8894 * decl dest movl cf, dest
8895 * andl (cf-ct),dest 1:
8896 * addl ct,dest
8898 * Size 20. Size 14.
8900 * This is reasonably steep, but branch mispredict costs are
8901 * high on modern cpus, so consider failing only if optimizing
8902 * for space.
8904 * %%% Parameterize branch_cost on the tuning architecture, then
8905 * use that. The 80386 couldn't care less about mispredicts.
8908 if (!optimize_size && !TARGET_CMOVE)
8910 if (ct == 0)
8912 ct = cf;
8913 cf = 0;
8914 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8915 /* We may be reversing unordered compare to normal compare,
8916 that is not valid in general (we may convert non-trapping
8917 condition to trapping one), however on i386 we currently
8918 emit all comparisons unordered. */
8919 code = reverse_condition_maybe_unordered (code);
8920 else
8922 code = reverse_condition (code);
8923 if (compare_code != NIL)
8924 compare_code = reverse_condition (compare_code);
8928 if (compare_code != NIL)
8930 /* notl op1 (if needed)
8931 sarl $31, op1
8932 andl (cf-ct), op1
8933 addl ct, op1
8935 For x < 0 (resp. x <= -1) there will be no notl,
8936 so if possible swap the constants to get rid of the
8937 complement.
8938 True/false will be -1/0 while code below (store flag
8939 followed by decrement) is 0/-1, so the constants need
8940 to be exchanged once more. */
8942 if (compare_code == GE || !cf)
8944 code = reverse_condition (code);
8945 compare_code = LT;
8947 else
8949 HOST_WIDE_INT tmp = cf;
8950 cf = ct;
8951 ct = tmp;
8954 out = emit_store_flag (out, code, ix86_compare_op0,
8955 ix86_compare_op1, VOIDmode, 0, -1);
8957 else
8959 out = emit_store_flag (out, code, ix86_compare_op0,
8960 ix86_compare_op1, VOIDmode, 0, 1);
8962 out = expand_simple_binop (mode, PLUS,
8963 out, constm1_rtx,
8964 out, 1, OPTAB_DIRECT);
8967 out = expand_simple_binop (mode, AND,
8968 out,
8969 gen_int_mode (cf - ct, mode),
8970 out, 1, OPTAB_DIRECT);
8971 out = expand_simple_binop (mode, PLUS,
8972 out, GEN_INT (ct),
8973 out, 1, OPTAB_DIRECT);
8974 if (out != operands[0])
8975 emit_move_insn (operands[0], out);
8977 return 1; /* DONE */
8981 if (!TARGET_CMOVE)
8983 /* Try a few things more with specific constants and a variable. */
8985 optab op;
8986 rtx var, orig_out, out, tmp;
8988 if (optimize_size)
8989 return 0; /* FAIL */
8991 /* If one of the two operands is an interesting constant, load a
8992 constant with the above and mask it in with a logical operation. */
8994 if (GET_CODE (operands[2]) == CONST_INT)
8996 var = operands[3];
8997 if (INTVAL (operands[2]) == 0)
8998 operands[3] = constm1_rtx, op = and_optab;
8999 else if (INTVAL (operands[2]) == -1)
9000 operands[3] = const0_rtx, op = ior_optab;
9001 else
9002 return 0; /* FAIL */
9004 else if (GET_CODE (operands[3]) == CONST_INT)
9006 var = operands[2];
9007 if (INTVAL (operands[3]) == 0)
9008 operands[2] = constm1_rtx, op = and_optab;
9009 else if (INTVAL (operands[3]) == -1)
9010 operands[2] = const0_rtx, op = ior_optab;
9011 else
9012 return 0; /* FAIL */
9014 else
9015 return 0; /* FAIL */
9017 orig_out = operands[0];
9018 tmp = gen_reg_rtx (mode);
9019 operands[0] = tmp;
9021 /* Recurse to get the constant loaded. */
9022 if (ix86_expand_int_movcc (operands) == 0)
9023 return 0; /* FAIL */
9025 /* Mask in the interesting variable. */
9026 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9027 OPTAB_WIDEN);
9028 if (out != orig_out)
9029 emit_move_insn (orig_out, out);
9031 return 1; /* DONE */
9035 * For comparison with above,
9037 * movl cf,dest
9038 * movl ct,tmp
9039 * cmpl op1,op2
9040 * cmovcc tmp,dest
9042 * Size 15.
9045 if (! nonimmediate_operand (operands[2], mode))
9046 operands[2] = force_reg (mode, operands[2]);
9047 if (! nonimmediate_operand (operands[3], mode))
9048 operands[3] = force_reg (mode, operands[3]);
9050 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9052 rtx tmp = gen_reg_rtx (mode);
9053 emit_move_insn (tmp, operands[3]);
9054 operands[3] = tmp;
9056 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9058 rtx tmp = gen_reg_rtx (mode);
9059 emit_move_insn (tmp, operands[2]);
9060 operands[2] = tmp;
9062 if (! register_operand (operands[2], VOIDmode)
9063 && ! register_operand (operands[3], VOIDmode))
9064 operands[2] = force_reg (mode, operands[2]);
9066 emit_insn (compare_seq);
9067 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9068 gen_rtx_IF_THEN_ELSE (mode,
9069 compare_op, operands[2],
9070 operands[3])));
9071 if (bypass_test)
9072 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9073 gen_rtx_IF_THEN_ELSE (mode,
9074 bypass_test,
9075 operands[3],
9076 operands[0])));
9077 if (second_test)
9078 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9079 gen_rtx_IF_THEN_ELSE (mode,
9080 second_test,
9081 operands[2],
9082 operands[0])));
9084 return 1; /* DONE */
9088 ix86_expand_fp_movcc (operands)
9089 rtx operands[];
9091 enum rtx_code code;
9092 rtx tmp;
9093 rtx compare_op, second_test, bypass_test;
9095 /* For SF/DFmode conditional moves based on comparisons
9096 in same mode, we may want to use SSE min/max instructions. */
9097 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9098 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9099 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9100 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9101 && (!TARGET_IEEE_FP
9102 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9103 /* We may be called from the post-reload splitter. */
9104 && (!REG_P (operands[0])
9105 || SSE_REG_P (operands[0])
9106 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9108 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9109 code = GET_CODE (operands[1]);
9111 /* See if we have (cross) match between comparison operands and
9112 conditional move operands. */
9113 if (rtx_equal_p (operands[2], op1))
9115 rtx tmp = op0;
9116 op0 = op1;
9117 op1 = tmp;
9118 code = reverse_condition_maybe_unordered (code);
9120 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9122 /* Check for min operation. */
9123 if (code == LT)
9125 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9126 if (memory_operand (op0, VOIDmode))
9127 op0 = force_reg (GET_MODE (operands[0]), op0);
9128 if (GET_MODE (operands[0]) == SFmode)
9129 emit_insn (gen_minsf3 (operands[0], op0, op1));
9130 else
9131 emit_insn (gen_mindf3 (operands[0], op0, op1));
9132 return 1;
9134 /* Check for max operation. */
9135 if (code == GT)
9137 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9138 if (memory_operand (op0, VOIDmode))
9139 op0 = force_reg (GET_MODE (operands[0]), op0);
9140 if (GET_MODE (operands[0]) == SFmode)
9141 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9142 else
9143 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9144 return 1;
9147 /* Manage condition to be sse_comparison_operator. In case we are
9148 in non-ieee mode, try to canonicalize the destination operand
9149 to be first in the comparison - this helps reload to avoid extra
9150 moves. */
9151 if (!sse_comparison_operator (operands[1], VOIDmode)
9152 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9154 rtx tmp = ix86_compare_op0;
9155 ix86_compare_op0 = ix86_compare_op1;
9156 ix86_compare_op1 = tmp;
9157 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9158 VOIDmode, ix86_compare_op0,
9159 ix86_compare_op1);
9161 /* Similary try to manage result to be first operand of conditional
9162 move. We also don't support the NE comparison on SSE, so try to
9163 avoid it. */
9164 if ((rtx_equal_p (operands[0], operands[3])
9165 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9166 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9168 rtx tmp = operands[2];
9169 operands[2] = operands[3];
9170 operands[3] = tmp;
9171 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9172 (GET_CODE (operands[1])),
9173 VOIDmode, ix86_compare_op0,
9174 ix86_compare_op1);
9176 if (GET_MODE (operands[0]) == SFmode)
9177 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9178 operands[2], operands[3],
9179 ix86_compare_op0, ix86_compare_op1));
9180 else
9181 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9182 operands[2], operands[3],
9183 ix86_compare_op0, ix86_compare_op1));
9184 return 1;
9187 /* The floating point conditional move instructions don't directly
9188 support conditions resulting from a signed integer comparison. */
9190 code = GET_CODE (operands[1]);
9191 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9193 /* The floating point conditional move instructions don't directly
9194 support signed integer comparisons. */
9196 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9198 if (second_test != NULL || bypass_test != NULL)
9199 abort ();
9200 tmp = gen_reg_rtx (QImode);
9201 ix86_expand_setcc (code, tmp);
9202 code = NE;
9203 ix86_compare_op0 = tmp;
9204 ix86_compare_op1 = const0_rtx;
9205 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9207 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9209 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9210 emit_move_insn (tmp, operands[3]);
9211 operands[3] = tmp;
9213 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9215 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9216 emit_move_insn (tmp, operands[2]);
9217 operands[2] = tmp;
9220 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9221 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9222 compare_op,
9223 operands[2],
9224 operands[3])));
9225 if (bypass_test)
9226 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9227 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9228 bypass_test,
9229 operands[3],
9230 operands[0])));
9231 if (second_test)
9232 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9233 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9234 second_test,
9235 operands[2],
9236 operands[0])));
9238 return 1;
9241 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9242 works for floating pointer parameters and nonoffsetable memories.
9243 For pushes, it returns just stack offsets; the values will be saved
9244 in the right order. Maximally three parts are generated. */
9246 static int
9247 ix86_split_to_parts (operand, parts, mode)
9248 rtx operand;
9249 rtx *parts;
9250 enum machine_mode mode;
9252 int size;
9254 if (!TARGET_64BIT)
9255 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9256 else
9257 size = (GET_MODE_SIZE (mode) + 4) / 8;
9259 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9260 abort ();
9261 if (size < 2 || size > 3)
9262 abort ();
9264 /* Optimize constant pool reference to immediates. This is used by fp
9265 moves, that force all constants to memory to allow combining. */
9266 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9268 rtx tmp = maybe_get_pool_constant (operand);
9269 if (tmp)
9270 operand = tmp;
9273 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9275 /* The only non-offsetable memories we handle are pushes. */
9276 if (! push_operand (operand, VOIDmode))
9277 abort ();
9279 operand = copy_rtx (operand);
9280 PUT_MODE (operand, Pmode);
9281 parts[0] = parts[1] = parts[2] = operand;
9283 else if (!TARGET_64BIT)
9285 if (mode == DImode)
9286 split_di (&operand, 1, &parts[0], &parts[1]);
9287 else
9289 if (REG_P (operand))
9291 if (!reload_completed)
9292 abort ();
9293 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9294 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9295 if (size == 3)
9296 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9298 else if (offsettable_memref_p (operand))
9300 operand = adjust_address (operand, SImode, 0);
9301 parts[0] = operand;
9302 parts[1] = adjust_address (operand, SImode, 4);
9303 if (size == 3)
9304 parts[2] = adjust_address (operand, SImode, 8);
9306 else if (GET_CODE (operand) == CONST_DOUBLE)
9308 REAL_VALUE_TYPE r;
9309 long l[4];
9311 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9312 switch (mode)
9314 case XFmode:
9315 case TFmode:
9316 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9317 parts[2] = gen_int_mode (l[2], SImode);
9318 break;
9319 case DFmode:
9320 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9321 break;
9322 default:
9323 abort ();
9325 parts[1] = gen_int_mode (l[1], SImode);
9326 parts[0] = gen_int_mode (l[0], SImode);
9328 else
9329 abort ();
9332 else
9334 if (mode == TImode)
9335 split_ti (&operand, 1, &parts[0], &parts[1]);
9336 if (mode == XFmode || mode == TFmode)
9338 if (REG_P (operand))
9340 if (!reload_completed)
9341 abort ();
9342 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9343 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9345 else if (offsettable_memref_p (operand))
9347 operand = adjust_address (operand, DImode, 0);
9348 parts[0] = operand;
9349 parts[1] = adjust_address (operand, SImode, 8);
9351 else if (GET_CODE (operand) == CONST_DOUBLE)
9353 REAL_VALUE_TYPE r;
9354 long l[3];
9356 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9357 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9358 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9359 if (HOST_BITS_PER_WIDE_INT >= 64)
9360 parts[0]
9361 = gen_int_mode
9362 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9363 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9364 DImode);
9365 else
9366 parts[0] = immed_double_const (l[0], l[1], DImode);
9367 parts[1] = gen_int_mode (l[2], SImode);
9369 else
9370 abort ();
9374 return size;
9377 /* Emit insns to perform a move or push of DI, DF, and XF values.
9378 Return false when normal moves are needed; true when all required
9379 insns have been emitted. Operands 2-4 contain the input values
9380 int the correct order; operands 5-7 contain the output values. */
9382 void
9383 ix86_split_long_move (operands)
9384 rtx operands[];
9386 rtx part[2][3];
9387 int nparts;
9388 int push = 0;
9389 int collisions = 0;
9390 enum machine_mode mode = GET_MODE (operands[0]);
9392 /* The DFmode expanders may ask us to move double.
9393 For 64bit target this is single move. By hiding the fact
9394 here we simplify i386.md splitters. */
9395 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9397 /* Optimize constant pool reference to immediates. This is used by
9398 fp moves, that force all constants to memory to allow combining. */
9400 if (GET_CODE (operands[1]) == MEM
9401 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9402 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9403 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9404 if (push_operand (operands[0], VOIDmode))
9406 operands[0] = copy_rtx (operands[0]);
9407 PUT_MODE (operands[0], Pmode);
9409 else
9410 operands[0] = gen_lowpart (DImode, operands[0]);
9411 operands[1] = gen_lowpart (DImode, operands[1]);
9412 emit_move_insn (operands[0], operands[1]);
9413 return;
9416 /* The only non-offsettable memory we handle is push. */
9417 if (push_operand (operands[0], VOIDmode))
9418 push = 1;
9419 else if (GET_CODE (operands[0]) == MEM
9420 && ! offsettable_memref_p (operands[0]))
9421 abort ();
9423 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9424 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9426 /* When emitting push, take care for source operands on the stack. */
9427 if (push && GET_CODE (operands[1]) == MEM
9428 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9430 if (nparts == 3)
9431 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9432 XEXP (part[1][2], 0));
9433 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9434 XEXP (part[1][1], 0));
9437 /* We need to do copy in the right order in case an address register
9438 of the source overlaps the destination. */
9439 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9441 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9442 collisions++;
9443 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9444 collisions++;
9445 if (nparts == 3
9446 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9447 collisions++;
9449 /* Collision in the middle part can be handled by reordering. */
9450 if (collisions == 1 && nparts == 3
9451 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9453 rtx tmp;
9454 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9455 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9458 /* If there are more collisions, we can't handle it by reordering.
9459 Do an lea to the last part and use only one colliding move. */
9460 else if (collisions > 1)
9462 collisions = 1;
9463 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9464 XEXP (part[1][0], 0)));
9465 part[1][0] = change_address (part[1][0],
9466 TARGET_64BIT ? DImode : SImode,
9467 part[0][nparts - 1]);
9468 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9469 if (nparts == 3)
9470 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9474 if (push)
9476 if (!TARGET_64BIT)
9478 if (nparts == 3)
9480 /* We use only first 12 bytes of TFmode value, but for pushing we
9481 are required to adjust stack as if we were pushing real 16byte
9482 value. */
9483 if (mode == TFmode && !TARGET_64BIT)
9484 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9485 GEN_INT (-4)));
9486 emit_move_insn (part[0][2], part[1][2]);
9489 else
9491 /* In 64bit mode we don't have 32bit push available. In case this is
9492 register, it is OK - we will just use larger counterpart. We also
9493 retype memory - these comes from attempt to avoid REX prefix on
9494 moving of second half of TFmode value. */
9495 if (GET_MODE (part[1][1]) == SImode)
9497 if (GET_CODE (part[1][1]) == MEM)
9498 part[1][1] = adjust_address (part[1][1], DImode, 0);
9499 else if (REG_P (part[1][1]))
9500 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9501 else
9502 abort ();
9503 if (GET_MODE (part[1][0]) == SImode)
9504 part[1][0] = part[1][1];
9507 emit_move_insn (part[0][1], part[1][1]);
9508 emit_move_insn (part[0][0], part[1][0]);
9509 return;
9512 /* Choose correct order to not overwrite the source before it is copied. */
9513 if ((REG_P (part[0][0])
9514 && REG_P (part[1][1])
9515 && (REGNO (part[0][0]) == REGNO (part[1][1])
9516 || (nparts == 3
9517 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9518 || (collisions > 0
9519 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9521 if (nparts == 3)
9523 operands[2] = part[0][2];
9524 operands[3] = part[0][1];
9525 operands[4] = part[0][0];
9526 operands[5] = part[1][2];
9527 operands[6] = part[1][1];
9528 operands[7] = part[1][0];
9530 else
9532 operands[2] = part[0][1];
9533 operands[3] = part[0][0];
9534 operands[5] = part[1][1];
9535 operands[6] = part[1][0];
9538 else
9540 if (nparts == 3)
9542 operands[2] = part[0][0];
9543 operands[3] = part[0][1];
9544 operands[4] = part[0][2];
9545 operands[5] = part[1][0];
9546 operands[6] = part[1][1];
9547 operands[7] = part[1][2];
9549 else
9551 operands[2] = part[0][0];
9552 operands[3] = part[0][1];
9553 operands[5] = part[1][0];
9554 operands[6] = part[1][1];
9557 emit_move_insn (operands[2], operands[5]);
9558 emit_move_insn (operands[3], operands[6]);
9559 if (nparts == 3)
9560 emit_move_insn (operands[4], operands[7]);
9562 return;
9565 void
9566 ix86_split_ashldi (operands, scratch)
9567 rtx *operands, scratch;
9569 rtx low[2], high[2];
9570 int count;
9572 if (GET_CODE (operands[2]) == CONST_INT)
9574 split_di (operands, 2, low, high);
9575 count = INTVAL (operands[2]) & 63;
9577 if (count >= 32)
9579 emit_move_insn (high[0], low[1]);
9580 emit_move_insn (low[0], const0_rtx);
9582 if (count > 32)
9583 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9585 else
9587 if (!rtx_equal_p (operands[0], operands[1]))
9588 emit_move_insn (operands[0], operands[1]);
9589 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9590 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9593 else
9595 if (!rtx_equal_p (operands[0], operands[1]))
9596 emit_move_insn (operands[0], operands[1]);
9598 split_di (operands, 1, low, high);
9600 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9601 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9603 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9605 if (! no_new_pseudos)
9606 scratch = force_reg (SImode, const0_rtx);
9607 else
9608 emit_move_insn (scratch, const0_rtx);
9610 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9611 scratch));
9613 else
9614 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9618 void
9619 ix86_split_ashrdi (operands, scratch)
9620 rtx *operands, scratch;
9622 rtx low[2], high[2];
9623 int count;
9625 if (GET_CODE (operands[2]) == CONST_INT)
9627 split_di (operands, 2, low, high);
9628 count = INTVAL (operands[2]) & 63;
9630 if (count >= 32)
9632 emit_move_insn (low[0], high[1]);
9634 if (! reload_completed)
9635 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9636 else
9638 emit_move_insn (high[0], low[0]);
9639 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9642 if (count > 32)
9643 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9645 else
9647 if (!rtx_equal_p (operands[0], operands[1]))
9648 emit_move_insn (operands[0], operands[1]);
9649 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9650 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9653 else
9655 if (!rtx_equal_p (operands[0], operands[1]))
9656 emit_move_insn (operands[0], operands[1]);
9658 split_di (operands, 1, low, high);
9660 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9661 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9663 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9665 if (! no_new_pseudos)
9666 scratch = gen_reg_rtx (SImode);
9667 emit_move_insn (scratch, high[0]);
9668 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9669 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9670 scratch));
9672 else
9673 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9677 void
9678 ix86_split_lshrdi (operands, scratch)
9679 rtx *operands, scratch;
9681 rtx low[2], high[2];
9682 int count;
9684 if (GET_CODE (operands[2]) == CONST_INT)
9686 split_di (operands, 2, low, high);
9687 count = INTVAL (operands[2]) & 63;
9689 if (count >= 32)
9691 emit_move_insn (low[0], high[1]);
9692 emit_move_insn (high[0], const0_rtx);
9694 if (count > 32)
9695 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9697 else
9699 if (!rtx_equal_p (operands[0], operands[1]))
9700 emit_move_insn (operands[0], operands[1]);
9701 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9702 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9705 else
9707 if (!rtx_equal_p (operands[0], operands[1]))
9708 emit_move_insn (operands[0], operands[1]);
9710 split_di (operands, 1, low, high);
9712 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9713 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9715 /* Heh. By reversing the arguments, we can reuse this pattern. */
9716 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9718 if (! no_new_pseudos)
9719 scratch = force_reg (SImode, const0_rtx);
9720 else
9721 emit_move_insn (scratch, const0_rtx);
9723 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9724 scratch));
9726 else
9727 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9731 /* Helper function for the string operations below. Dest VARIABLE whether
9732 it is aligned to VALUE bytes. If true, jump to the label. */
9733 static rtx
9734 ix86_expand_aligntest (variable, value)
9735 rtx variable;
9736 int value;
9738 rtx label = gen_label_rtx ();
9739 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9740 if (GET_MODE (variable) == DImode)
9741 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9742 else
9743 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9744 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9745 1, label);
9746 return label;
9749 /* Adjust COUNTER by the VALUE. */
9750 static void
9751 ix86_adjust_counter (countreg, value)
9752 rtx countreg;
9753 HOST_WIDE_INT value;
9755 if (GET_MODE (countreg) == DImode)
9756 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9757 else
9758 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9761 /* Zero extend possibly SImode EXP to Pmode register. */
9763 ix86_zero_extend_to_Pmode (exp)
9764 rtx exp;
9766 rtx r;
9767 if (GET_MODE (exp) == VOIDmode)
9768 return force_reg (Pmode, exp);
9769 if (GET_MODE (exp) == Pmode)
9770 return copy_to_mode_reg (Pmode, exp);
9771 r = gen_reg_rtx (Pmode);
9772 emit_insn (gen_zero_extendsidi2 (r, exp));
9773 return r;
9776 /* Expand string move (memcpy) operation. Use i386 string operations when
9777 profitable. expand_clrstr contains similar code. */
9779 ix86_expand_movstr (dst, src, count_exp, align_exp)
9780 rtx dst, src, count_exp, align_exp;
9782 rtx srcreg, destreg, countreg;
9783 enum machine_mode counter_mode;
9784 HOST_WIDE_INT align = 0;
9785 unsigned HOST_WIDE_INT count = 0;
9786 rtx insns;
9788 start_sequence ();
9790 if (GET_CODE (align_exp) == CONST_INT)
9791 align = INTVAL (align_exp);
9793 /* This simple hack avoids all inlining code and simplifies code below. */
9794 if (!TARGET_ALIGN_STRINGOPS)
9795 align = 64;
9797 if (GET_CODE (count_exp) == CONST_INT)
9798 count = INTVAL (count_exp);
9800 /* Figure out proper mode for counter. For 32bits it is always SImode,
9801 for 64bits use SImode when possible, otherwise DImode.
9802 Set count to number of bytes copied when known at compile time. */
9803 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9804 || x86_64_zero_extended_value (count_exp))
9805 counter_mode = SImode;
9806 else
9807 counter_mode = DImode;
9809 if (counter_mode != SImode && counter_mode != DImode)
9810 abort ();
9812 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9813 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9815 emit_insn (gen_cld ());
9817 /* When optimizing for size emit simple rep ; movsb instruction for
9818 counts not divisible by 4. */
9820 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9822 countreg = ix86_zero_extend_to_Pmode (count_exp);
9823 if (TARGET_64BIT)
9824 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9825 destreg, srcreg, countreg));
9826 else
9827 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9828 destreg, srcreg, countreg));
9831 /* For constant aligned (or small unaligned) copies use rep movsl
9832 followed by code copying the rest. For PentiumPro ensure 8 byte
9833 alignment to allow rep movsl acceleration. */
9835 else if (count != 0
9836 && (align >= 8
9837 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9838 || optimize_size || count < (unsigned int) 64))
9840 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9841 if (count & ~(size - 1))
9843 countreg = copy_to_mode_reg (counter_mode,
9844 GEN_INT ((count >> (size == 4 ? 2 : 3))
9845 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9846 countreg = ix86_zero_extend_to_Pmode (countreg);
9847 if (size == 4)
9849 if (TARGET_64BIT)
9850 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9851 destreg, srcreg, countreg));
9852 else
9853 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9854 destreg, srcreg, countreg));
9856 else
9857 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9858 destreg, srcreg, countreg));
9860 if (size == 8 && (count & 0x04))
9861 emit_insn (gen_strmovsi (destreg, srcreg));
9862 if (count & 0x02)
9863 emit_insn (gen_strmovhi (destreg, srcreg));
9864 if (count & 0x01)
9865 emit_insn (gen_strmovqi (destreg, srcreg));
9867 /* The generic code based on the glibc implementation:
9868 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9869 allowing accelerated copying there)
9870 - copy the data using rep movsl
9871 - copy the rest. */
9872 else
9874 rtx countreg2;
9875 rtx label = NULL;
9876 int desired_alignment = (TARGET_PENTIUMPRO
9877 && (count == 0 || count >= (unsigned int) 260)
9878 ? 8 : UNITS_PER_WORD);
9880 /* In case we don't know anything about the alignment, default to
9881 library version, since it is usually equally fast and result in
9882 shorter code. */
9883 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9885 end_sequence ();
9886 return 0;
9889 if (TARGET_SINGLE_STRINGOP)
9890 emit_insn (gen_cld ());
9892 countreg2 = gen_reg_rtx (Pmode);
9893 countreg = copy_to_mode_reg (counter_mode, count_exp);
9895 /* We don't use loops to align destination and to copy parts smaller
9896 than 4 bytes, because gcc is able to optimize such code better (in
9897 the case the destination or the count really is aligned, gcc is often
9898 able to predict the branches) and also it is friendlier to the
9899 hardware branch prediction.
9901 Using loops is benefical for generic case, because we can
9902 handle small counts using the loops. Many CPUs (such as Athlon)
9903 have large REP prefix setup costs.
9905 This is quite costy. Maybe we can revisit this decision later or
9906 add some customizability to this code. */
9908 if (count == 0 && align < desired_alignment)
9910 label = gen_label_rtx ();
9911 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9912 LEU, 0, counter_mode, 1, label);
9914 if (align <= 1)
9916 rtx label = ix86_expand_aligntest (destreg, 1);
9917 emit_insn (gen_strmovqi (destreg, srcreg));
9918 ix86_adjust_counter (countreg, 1);
9919 emit_label (label);
9920 LABEL_NUSES (label) = 1;
9922 if (align <= 2)
9924 rtx label = ix86_expand_aligntest (destreg, 2);
9925 emit_insn (gen_strmovhi (destreg, srcreg));
9926 ix86_adjust_counter (countreg, 2);
9927 emit_label (label);
9928 LABEL_NUSES (label) = 1;
9930 if (align <= 4 && desired_alignment > 4)
9932 rtx label = ix86_expand_aligntest (destreg, 4);
9933 emit_insn (gen_strmovsi (destreg, srcreg));
9934 ix86_adjust_counter (countreg, 4);
9935 emit_label (label);
9936 LABEL_NUSES (label) = 1;
9939 if (label && desired_alignment > 4 && !TARGET_64BIT)
9941 emit_label (label);
9942 LABEL_NUSES (label) = 1;
9943 label = NULL_RTX;
9945 if (!TARGET_SINGLE_STRINGOP)
9946 emit_insn (gen_cld ());
9947 if (TARGET_64BIT)
9949 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9950 GEN_INT (3)));
9951 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9952 destreg, srcreg, countreg2));
9954 else
9956 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9957 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9958 destreg, srcreg, countreg2));
9961 if (label)
9963 emit_label (label);
9964 LABEL_NUSES (label) = 1;
9966 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9967 emit_insn (gen_strmovsi (destreg, srcreg));
9968 if ((align <= 4 || count == 0) && TARGET_64BIT)
9970 rtx label = ix86_expand_aligntest (countreg, 4);
9971 emit_insn (gen_strmovsi (destreg, srcreg));
9972 emit_label (label);
9973 LABEL_NUSES (label) = 1;
9975 if (align > 2 && count != 0 && (count & 2))
9976 emit_insn (gen_strmovhi (destreg, srcreg));
9977 if (align <= 2 || count == 0)
9979 rtx label = ix86_expand_aligntest (countreg, 2);
9980 emit_insn (gen_strmovhi (destreg, srcreg));
9981 emit_label (label);
9982 LABEL_NUSES (label) = 1;
9984 if (align > 1 && count != 0 && (count & 1))
9985 emit_insn (gen_strmovqi (destreg, srcreg));
9986 if (align <= 1 || count == 0)
9988 rtx label = ix86_expand_aligntest (countreg, 1);
9989 emit_insn (gen_strmovqi (destreg, srcreg));
9990 emit_label (label);
9991 LABEL_NUSES (label) = 1;
9995 insns = get_insns ();
9996 end_sequence ();
9998 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9999 emit_insn (insns);
10000 return 1;
10003 /* Expand string clear operation (bzero). Use i386 string operations when
10004 profitable. expand_movstr contains similar code. */
10006 ix86_expand_clrstr (src, count_exp, align_exp)
10007 rtx src, count_exp, align_exp;
10009 rtx destreg, zeroreg, countreg;
10010 enum machine_mode counter_mode;
10011 HOST_WIDE_INT align = 0;
10012 unsigned HOST_WIDE_INT count = 0;
10014 if (GET_CODE (align_exp) == CONST_INT)
10015 align = INTVAL (align_exp);
10017 /* This simple hack avoids all inlining code and simplifies code below. */
10018 if (!TARGET_ALIGN_STRINGOPS)
10019 align = 32;
10021 if (GET_CODE (count_exp) == CONST_INT)
10022 count = INTVAL (count_exp);
10023 /* Figure out proper mode for counter. For 32bits it is always SImode,
10024 for 64bits use SImode when possible, otherwise DImode.
10025 Set count to number of bytes copied when known at compile time. */
10026 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10027 || x86_64_zero_extended_value (count_exp))
10028 counter_mode = SImode;
10029 else
10030 counter_mode = DImode;
10032 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10034 emit_insn (gen_cld ());
10036 /* When optimizing for size emit simple rep ; movsb instruction for
10037 counts not divisible by 4. */
10039 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10041 countreg = ix86_zero_extend_to_Pmode (count_exp);
10042 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10043 if (TARGET_64BIT)
10044 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10045 destreg, countreg));
10046 else
10047 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10048 destreg, countreg));
10050 else if (count != 0
10051 && (align >= 8
10052 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10053 || optimize_size || count < (unsigned int) 64))
10055 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10056 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10057 if (count & ~(size - 1))
10059 countreg = copy_to_mode_reg (counter_mode,
10060 GEN_INT ((count >> (size == 4 ? 2 : 3))
10061 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10062 countreg = ix86_zero_extend_to_Pmode (countreg);
10063 if (size == 4)
10065 if (TARGET_64BIT)
10066 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10067 destreg, countreg));
10068 else
10069 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10070 destreg, countreg));
10072 else
10073 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10074 destreg, countreg));
10076 if (size == 8 && (count & 0x04))
10077 emit_insn (gen_strsetsi (destreg,
10078 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10079 if (count & 0x02)
10080 emit_insn (gen_strsethi (destreg,
10081 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10082 if (count & 0x01)
10083 emit_insn (gen_strsetqi (destreg,
10084 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10086 else
10088 rtx countreg2;
10089 rtx label = NULL;
10090 /* Compute desired alignment of the string operation. */
10091 int desired_alignment = (TARGET_PENTIUMPRO
10092 && (count == 0 || count >= (unsigned int) 260)
10093 ? 8 : UNITS_PER_WORD);
10095 /* In case we don't know anything about the alignment, default to
10096 library version, since it is usually equally fast and result in
10097 shorter code. */
10098 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10099 return 0;
10101 if (TARGET_SINGLE_STRINGOP)
10102 emit_insn (gen_cld ());
10104 countreg2 = gen_reg_rtx (Pmode);
10105 countreg = copy_to_mode_reg (counter_mode, count_exp);
10106 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10108 if (count == 0 && align < desired_alignment)
10110 label = gen_label_rtx ();
10111 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10112 LEU, 0, counter_mode, 1, label);
10114 if (align <= 1)
10116 rtx label = ix86_expand_aligntest (destreg, 1);
10117 emit_insn (gen_strsetqi (destreg,
10118 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10119 ix86_adjust_counter (countreg, 1);
10120 emit_label (label);
10121 LABEL_NUSES (label) = 1;
10123 if (align <= 2)
10125 rtx label = ix86_expand_aligntest (destreg, 2);
10126 emit_insn (gen_strsethi (destreg,
10127 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10128 ix86_adjust_counter (countreg, 2);
10129 emit_label (label);
10130 LABEL_NUSES (label) = 1;
10132 if (align <= 4 && desired_alignment > 4)
10134 rtx label = ix86_expand_aligntest (destreg, 4);
10135 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10136 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10137 : zeroreg)));
10138 ix86_adjust_counter (countreg, 4);
10139 emit_label (label);
10140 LABEL_NUSES (label) = 1;
10143 if (label && desired_alignment > 4 && !TARGET_64BIT)
10145 emit_label (label);
10146 LABEL_NUSES (label) = 1;
10147 label = NULL_RTX;
10150 if (!TARGET_SINGLE_STRINGOP)
10151 emit_insn (gen_cld ());
10152 if (TARGET_64BIT)
10154 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10155 GEN_INT (3)));
10156 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10157 destreg, countreg2));
10159 else
10161 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10162 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10163 destreg, countreg2));
10165 if (label)
10167 emit_label (label);
10168 LABEL_NUSES (label) = 1;
10171 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10172 emit_insn (gen_strsetsi (destreg,
10173 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10174 if (TARGET_64BIT && (align <= 4 || count == 0))
10176 rtx label = ix86_expand_aligntest (countreg, 2);
10177 emit_insn (gen_strsetsi (destreg,
10178 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10179 emit_label (label);
10180 LABEL_NUSES (label) = 1;
10182 if (align > 2 && count != 0 && (count & 2))
10183 emit_insn (gen_strsethi (destreg,
10184 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10185 if (align <= 2 || count == 0)
10187 rtx label = ix86_expand_aligntest (countreg, 2);
10188 emit_insn (gen_strsethi (destreg,
10189 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10190 emit_label (label);
10191 LABEL_NUSES (label) = 1;
10193 if (align > 1 && count != 0 && (count & 1))
10194 emit_insn (gen_strsetqi (destreg,
10195 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10196 if (align <= 1 || count == 0)
10198 rtx label = ix86_expand_aligntest (countreg, 1);
10199 emit_insn (gen_strsetqi (destreg,
10200 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10201 emit_label (label);
10202 LABEL_NUSES (label) = 1;
10205 return 1;
10207 /* Expand strlen. */
10209 ix86_expand_strlen (out, src, eoschar, align)
10210 rtx out, src, eoschar, align;
10212 rtx addr, scratch1, scratch2, scratch3, scratch4;
10214 /* The generic case of strlen expander is long. Avoid it's
10215 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10217 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10218 && !TARGET_INLINE_ALL_STRINGOPS
10219 && !optimize_size
10220 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10221 return 0;
10223 addr = force_reg (Pmode, XEXP (src, 0));
10224 scratch1 = gen_reg_rtx (Pmode);
10226 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10227 && !optimize_size)
10229 /* Well it seems that some optimizer does not combine a call like
10230 foo(strlen(bar), strlen(bar));
10231 when the move and the subtraction is done here. It does calculate
10232 the length just once when these instructions are done inside of
10233 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10234 often used and I use one fewer register for the lifetime of
10235 output_strlen_unroll() this is better. */
10237 emit_move_insn (out, addr);
10239 ix86_expand_strlensi_unroll_1 (out, align);
10241 /* strlensi_unroll_1 returns the address of the zero at the end of
10242 the string, like memchr(), so compute the length by subtracting
10243 the start address. */
10244 if (TARGET_64BIT)
10245 emit_insn (gen_subdi3 (out, out, addr));
10246 else
10247 emit_insn (gen_subsi3 (out, out, addr));
10249 else
10251 scratch2 = gen_reg_rtx (Pmode);
10252 scratch3 = gen_reg_rtx (Pmode);
10253 scratch4 = force_reg (Pmode, constm1_rtx);
10255 emit_move_insn (scratch3, addr);
10256 eoschar = force_reg (QImode, eoschar);
10258 emit_insn (gen_cld ());
10259 if (TARGET_64BIT)
10261 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10262 align, scratch4, scratch3));
10263 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10264 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10266 else
10268 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10269 align, scratch4, scratch3));
10270 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10271 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10274 return 1;
10277 /* Expand the appropriate insns for doing strlen if not just doing
10278 repnz; scasb
10280 out = result, initialized with the start address
10281 align_rtx = alignment of the address.
10282 scratch = scratch register, initialized with the startaddress when
10283 not aligned, otherwise undefined
10285 This is just the body. It needs the initialisations mentioned above and
10286 some address computing at the end. These things are done in i386.md. */
10288 static void
10289 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10290 rtx out, align_rtx;
10292 int align;
10293 rtx tmp;
10294 rtx align_2_label = NULL_RTX;
10295 rtx align_3_label = NULL_RTX;
10296 rtx align_4_label = gen_label_rtx ();
10297 rtx end_0_label = gen_label_rtx ();
10298 rtx mem;
10299 rtx tmpreg = gen_reg_rtx (SImode);
10300 rtx scratch = gen_reg_rtx (SImode);
10302 align = 0;
10303 if (GET_CODE (align_rtx) == CONST_INT)
10304 align = INTVAL (align_rtx);
10306 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10308 /* Is there a known alignment and is it less than 4? */
10309 if (align < 4)
10311 rtx scratch1 = gen_reg_rtx (Pmode);
10312 emit_move_insn (scratch1, out);
10313 /* Is there a known alignment and is it not 2? */
10314 if (align != 2)
10316 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10317 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10319 /* Leave just the 3 lower bits. */
10320 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10321 NULL_RTX, 0, OPTAB_WIDEN);
10323 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10324 Pmode, 1, align_4_label);
10325 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10326 Pmode, 1, align_2_label);
10327 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10328 Pmode, 1, align_3_label);
10330 else
10332 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10333 check if is aligned to 4 - byte. */
10335 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10336 NULL_RTX, 0, OPTAB_WIDEN);
10338 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10339 Pmode, 1, align_4_label);
10342 mem = gen_rtx_MEM (QImode, out);
10344 /* Now compare the bytes. */
10346 /* Compare the first n unaligned byte on a byte per byte basis. */
10347 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10348 QImode, 1, end_0_label);
10350 /* Increment the address. */
10351 if (TARGET_64BIT)
10352 emit_insn (gen_adddi3 (out, out, const1_rtx));
10353 else
10354 emit_insn (gen_addsi3 (out, out, const1_rtx));
10356 /* Not needed with an alignment of 2 */
10357 if (align != 2)
10359 emit_label (align_2_label);
10361 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10362 end_0_label);
10364 if (TARGET_64BIT)
10365 emit_insn (gen_adddi3 (out, out, const1_rtx));
10366 else
10367 emit_insn (gen_addsi3 (out, out, const1_rtx));
10369 emit_label (align_3_label);
10372 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10373 end_0_label);
10375 if (TARGET_64BIT)
10376 emit_insn (gen_adddi3 (out, out, const1_rtx));
10377 else
10378 emit_insn (gen_addsi3 (out, out, const1_rtx));
10381 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10382 align this loop. It gives only huge programs, but does not help to
10383 speed up. */
10384 emit_label (align_4_label);
10386 mem = gen_rtx_MEM (SImode, out);
10387 emit_move_insn (scratch, mem);
10388 if (TARGET_64BIT)
10389 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10390 else
10391 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10393 /* This formula yields a nonzero result iff one of the bytes is zero.
10394 This saves three branches inside loop and many cycles. */
10396 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10397 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10398 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10399 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10400 gen_int_mode (0x80808080, SImode)));
10401 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10402 align_4_label);
10404 if (TARGET_CMOVE)
10406 rtx reg = gen_reg_rtx (SImode);
10407 rtx reg2 = gen_reg_rtx (Pmode);
10408 emit_move_insn (reg, tmpreg);
10409 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10411 /* If zero is not in the first two bytes, move two bytes forward. */
10412 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10413 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10414 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10415 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10416 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10417 reg,
10418 tmpreg)));
10419 /* Emit lea manually to avoid clobbering of flags. */
10420 emit_insn (gen_rtx_SET (SImode, reg2,
10421 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10423 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10424 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10425 emit_insn (gen_rtx_SET (VOIDmode, out,
10426 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10427 reg2,
10428 out)));
10431 else
10433 rtx end_2_label = gen_label_rtx ();
10434 /* Is zero in the first two bytes? */
10436 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10437 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10438 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10439 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10440 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10441 pc_rtx);
10442 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10443 JUMP_LABEL (tmp) = end_2_label;
10445 /* Not in the first two. Move two bytes forward. */
10446 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10447 if (TARGET_64BIT)
10448 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10449 else
10450 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10452 emit_label (end_2_label);
10456 /* Avoid branch in fixing the byte. */
10457 tmpreg = gen_lowpart (QImode, tmpreg);
10458 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10459 if (TARGET_64BIT)
10460 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10461 else
10462 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10464 emit_label (end_0_label);
10467 void
10468 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10469 rtx retval, fnaddr, callarg1, callarg2, pop;
10471 rtx use = NULL, call;
10473 if (pop == const0_rtx)
10474 pop = NULL;
10475 if (TARGET_64BIT && pop)
10476 abort ();
10478 /* Static functions and indirect calls don't need the pic register. */
10479 if (! TARGET_64BIT && flag_pic
10480 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10481 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10482 use_reg (&use, pic_offset_table_rtx);
10484 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10486 rtx al = gen_rtx_REG (QImode, 0);
10487 emit_move_insn (al, callarg2);
10488 use_reg (&use, al);
10491 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10493 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10494 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10497 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10498 if (retval)
10499 call = gen_rtx_SET (VOIDmode, retval, call);
10500 if (pop)
10502 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10503 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10504 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10507 call = emit_call_insn (call);
10508 if (use)
10509 CALL_INSN_FUNCTION_USAGE (call) = use;
10513 /* Clear stack slot assignments remembered from previous functions.
10514 This is called from INIT_EXPANDERS once before RTL is emitted for each
10515 function. */
10517 static struct machine_function *
10518 ix86_init_machine_status ()
10520 return ggc_alloc_cleared (sizeof (struct machine_function));
10523 /* Return a MEM corresponding to a stack slot with mode MODE.
10524 Allocate a new slot if necessary.
10526 The RTL for a function can have several slots available: N is
10527 which slot to use. */
10530 assign_386_stack_local (mode, n)
10531 enum machine_mode mode;
10532 int n;
10534 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10535 abort ();
10537 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10538 ix86_stack_locals[(int) mode][n]
10539 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10541 return ix86_stack_locals[(int) mode][n];
10544 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10546 static GTY(()) rtx ix86_tls_symbol;
10548 ix86_tls_get_addr ()
10551 if (!ix86_tls_symbol)
10553 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10554 ? "___tls_get_addr"
10555 : "__tls_get_addr"));
10558 return ix86_tls_symbol;
10561 /* Calculate the length of the memory address in the instruction
10562 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10564 static int
10565 memory_address_length (addr)
10566 rtx addr;
10568 struct ix86_address parts;
10569 rtx base, index, disp;
10570 int len;
10572 if (GET_CODE (addr) == PRE_DEC
10573 || GET_CODE (addr) == POST_INC
10574 || GET_CODE (addr) == PRE_MODIFY
10575 || GET_CODE (addr) == POST_MODIFY)
10576 return 0;
10578 if (! ix86_decompose_address (addr, &parts))
10579 abort ();
10581 base = parts.base;
10582 index = parts.index;
10583 disp = parts.disp;
10584 len = 0;
10586 /* Register Indirect. */
10587 if (base && !index && !disp)
10589 /* Special cases: ebp and esp need the two-byte modrm form. */
10590 if (addr == stack_pointer_rtx
10591 || addr == arg_pointer_rtx
10592 || addr == frame_pointer_rtx
10593 || addr == hard_frame_pointer_rtx)
10594 len = 1;
10597 /* Direct Addressing. */
10598 else if (disp && !base && !index)
10599 len = 4;
10601 else
10603 /* Find the length of the displacement constant. */
10604 if (disp)
10606 if (GET_CODE (disp) == CONST_INT
10607 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10608 len = 1;
10609 else
10610 len = 4;
10613 /* An index requires the two-byte modrm form. */
10614 if (index)
10615 len += 1;
10618 return len;
10621 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10622 is set, expect that insn have 8bit immediate alternative. */
10624 ix86_attr_length_immediate_default (insn, shortform)
10625 rtx insn;
10626 int shortform;
10628 int len = 0;
10629 int i;
10630 extract_insn_cached (insn);
10631 for (i = recog_data.n_operands - 1; i >= 0; --i)
10632 if (CONSTANT_P (recog_data.operand[i]))
10634 if (len)
10635 abort ();
10636 if (shortform
10637 && GET_CODE (recog_data.operand[i]) == CONST_INT
10638 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10639 len = 1;
10640 else
10642 switch (get_attr_mode (insn))
10644 case MODE_QI:
10645 len+=1;
10646 break;
10647 case MODE_HI:
10648 len+=2;
10649 break;
10650 case MODE_SI:
10651 len+=4;
10652 break;
10653 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10654 case MODE_DI:
10655 len+=4;
10656 break;
10657 default:
10658 fatal_insn ("unknown insn mode", insn);
10662 return len;
10664 /* Compute default value for "length_address" attribute. */
10666 ix86_attr_length_address_default (insn)
10667 rtx insn;
10669 int i;
10670 extract_insn_cached (insn);
10671 for (i = recog_data.n_operands - 1; i >= 0; --i)
10672 if (GET_CODE (recog_data.operand[i]) == MEM)
10674 return memory_address_length (XEXP (recog_data.operand[i], 0));
10675 break;
10677 return 0;
10680 /* Return the maximum number of instructions a cpu can issue. */
10682 static int
10683 ix86_issue_rate ()
10685 switch (ix86_cpu)
10687 case PROCESSOR_PENTIUM:
10688 case PROCESSOR_K6:
10689 return 2;
10691 case PROCESSOR_PENTIUMPRO:
10692 case PROCESSOR_PENTIUM4:
10693 case PROCESSOR_ATHLON:
10694 return 3;
10696 default:
10697 return 1;
10701 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10702 by DEP_INSN and nothing set by DEP_INSN. */
10704 static int
10705 ix86_flags_dependant (insn, dep_insn, insn_type)
10706 rtx insn, dep_insn;
10707 enum attr_type insn_type;
10709 rtx set, set2;
10711 /* Simplify the test for uninteresting insns. */
10712 if (insn_type != TYPE_SETCC
10713 && insn_type != TYPE_ICMOV
10714 && insn_type != TYPE_FCMOV
10715 && insn_type != TYPE_IBR)
10716 return 0;
10718 if ((set = single_set (dep_insn)) != 0)
10720 set = SET_DEST (set);
10721 set2 = NULL_RTX;
10723 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10724 && XVECLEN (PATTERN (dep_insn), 0) == 2
10725 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10726 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10728 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10729 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10731 else
10732 return 0;
10734 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10735 return 0;
10737 /* This test is true if the dependent insn reads the flags but
10738 not any other potentially set register. */
10739 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10740 return 0;
10742 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10743 return 0;
10745 return 1;
10748 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10749 address with operands set by DEP_INSN. */
10751 static int
10752 ix86_agi_dependant (insn, dep_insn, insn_type)
10753 rtx insn, dep_insn;
10754 enum attr_type insn_type;
10756 rtx addr;
10758 if (insn_type == TYPE_LEA
10759 && TARGET_PENTIUM)
10761 addr = PATTERN (insn);
10762 if (GET_CODE (addr) == SET)
10764 else if (GET_CODE (addr) == PARALLEL
10765 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10766 addr = XVECEXP (addr, 0, 0);
10767 else
10768 abort ();
10769 addr = SET_SRC (addr);
10771 else
10773 int i;
10774 extract_insn_cached (insn);
10775 for (i = recog_data.n_operands - 1; i >= 0; --i)
10776 if (GET_CODE (recog_data.operand[i]) == MEM)
10778 addr = XEXP (recog_data.operand[i], 0);
10779 goto found;
10781 return 0;
10782 found:;
10785 return modified_in_p (addr, dep_insn);
10788 static int
10789 ix86_adjust_cost (insn, link, dep_insn, cost)
10790 rtx insn, link, dep_insn;
10791 int cost;
10793 enum attr_type insn_type, dep_insn_type;
10794 enum attr_memory memory, dep_memory;
10795 rtx set, set2;
10796 int dep_insn_code_number;
10798 /* Anti and output depenancies have zero cost on all CPUs. */
10799 if (REG_NOTE_KIND (link) != 0)
10800 return 0;
10802 dep_insn_code_number = recog_memoized (dep_insn);
10804 /* If we can't recognize the insns, we can't really do anything. */
10805 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10806 return cost;
10808 insn_type = get_attr_type (insn);
10809 dep_insn_type = get_attr_type (dep_insn);
10811 switch (ix86_cpu)
10813 case PROCESSOR_PENTIUM:
10814 /* Address Generation Interlock adds a cycle of latency. */
10815 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10816 cost += 1;
10818 /* ??? Compares pair with jump/setcc. */
10819 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10820 cost = 0;
10822 /* Floating point stores require value to be ready one cycle ealier. */
10823 if (insn_type == TYPE_FMOV
10824 && get_attr_memory (insn) == MEMORY_STORE
10825 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10826 cost += 1;
10827 break;
10829 case PROCESSOR_PENTIUMPRO:
10830 memory = get_attr_memory (insn);
10831 dep_memory = get_attr_memory (dep_insn);
10833 /* Since we can't represent delayed latencies of load+operation,
10834 increase the cost here for non-imov insns. */
10835 if (dep_insn_type != TYPE_IMOV
10836 && dep_insn_type != TYPE_FMOV
10837 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10838 cost += 1;
10840 /* INT->FP conversion is expensive. */
10841 if (get_attr_fp_int_src (dep_insn))
10842 cost += 5;
10844 /* There is one cycle extra latency between an FP op and a store. */
10845 if (insn_type == TYPE_FMOV
10846 && (set = single_set (dep_insn)) != NULL_RTX
10847 && (set2 = single_set (insn)) != NULL_RTX
10848 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10849 && GET_CODE (SET_DEST (set2)) == MEM)
10850 cost += 1;
10852 /* Show ability of reorder buffer to hide latency of load by executing
10853 in parallel with previous instruction in case
10854 previous instruction is not needed to compute the address. */
10855 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10856 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10858 /* Claim moves to take one cycle, as core can issue one load
10859 at time and the next load can start cycle later. */
10860 if (dep_insn_type == TYPE_IMOV
10861 || dep_insn_type == TYPE_FMOV)
10862 cost = 1;
10863 else if (cost > 1)
10864 cost--;
10866 break;
10868 case PROCESSOR_K6:
10869 memory = get_attr_memory (insn);
10870 dep_memory = get_attr_memory (dep_insn);
10871 /* The esp dependency is resolved before the instruction is really
10872 finished. */
10873 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10874 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10875 return 1;
10877 /* Since we can't represent delayed latencies of load+operation,
10878 increase the cost here for non-imov insns. */
10879 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10880 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10882 /* INT->FP conversion is expensive. */
10883 if (get_attr_fp_int_src (dep_insn))
10884 cost += 5;
10886 /* Show ability of reorder buffer to hide latency of load by executing
10887 in parallel with previous instruction in case
10888 previous instruction is not needed to compute the address. */
10889 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10890 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10892 /* Claim moves to take one cycle, as core can issue one load
10893 at time and the next load can start cycle later. */
10894 if (dep_insn_type == TYPE_IMOV
10895 || dep_insn_type == TYPE_FMOV)
10896 cost = 1;
10897 else if (cost > 2)
10898 cost -= 2;
10899 else
10900 cost = 1;
10902 break;
10904 case PROCESSOR_ATHLON:
10905 memory = get_attr_memory (insn);
10906 dep_memory = get_attr_memory (dep_insn);
10908 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10910 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10911 cost += 2;
10912 else
10913 cost += 3;
10915 /* Show ability of reorder buffer to hide latency of load by executing
10916 in parallel with previous instruction in case
10917 previous instruction is not needed to compute the address. */
10918 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10919 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10921 /* Claim moves to take one cycle, as core can issue one load
10922 at time and the next load can start cycle later. */
10923 if (dep_insn_type == TYPE_IMOV
10924 || dep_insn_type == TYPE_FMOV)
10925 cost = 0;
10926 else if (cost >= 3)
10927 cost -= 3;
10928 else
10929 cost = 0;
10932 default:
10933 break;
10936 return cost;
10939 static union
10941 struct ppro_sched_data
10943 rtx decode[3];
10944 int issued_this_cycle;
10945 } ppro;
10946 } ix86_sched_data;
10948 static enum attr_ppro_uops
10949 ix86_safe_ppro_uops (insn)
10950 rtx insn;
10952 if (recog_memoized (insn) >= 0)
10953 return get_attr_ppro_uops (insn);
10954 else
10955 return PPRO_UOPS_MANY;
10958 static void
10959 ix86_dump_ppro_packet (dump)
10960 FILE *dump;
10962 if (ix86_sched_data.ppro.decode[0])
10964 fprintf (dump, "PPRO packet: %d",
10965 INSN_UID (ix86_sched_data.ppro.decode[0]));
10966 if (ix86_sched_data.ppro.decode[1])
10967 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10968 if (ix86_sched_data.ppro.decode[2])
10969 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10970 fputc ('\n', dump);
10974 /* We're beginning a new block. Initialize data structures as necessary. */
10976 static void
10977 ix86_sched_init (dump, sched_verbose, veclen)
10978 FILE *dump ATTRIBUTE_UNUSED;
10979 int sched_verbose ATTRIBUTE_UNUSED;
10980 int veclen ATTRIBUTE_UNUSED;
10982 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10985 /* Shift INSN to SLOT, and shift everything else down. */
10987 static void
10988 ix86_reorder_insn (insnp, slot)
10989 rtx *insnp, *slot;
10991 if (insnp != slot)
10993 rtx insn = *insnp;
10995 insnp[0] = insnp[1];
10996 while (++insnp != slot);
10997 *insnp = insn;
11001 static void
11002 ix86_sched_reorder_ppro (ready, e_ready)
11003 rtx *ready;
11004 rtx *e_ready;
11006 rtx decode[3];
11007 enum attr_ppro_uops cur_uops;
11008 int issued_this_cycle;
11009 rtx *insnp;
11010 int i;
11012 /* At this point .ppro.decode contains the state of the three
11013 decoders from last "cycle". That is, those insns that were
11014 actually independent. But here we're scheduling for the
11015 decoder, and we may find things that are decodable in the
11016 same cycle. */
11018 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11019 issued_this_cycle = 0;
11021 insnp = e_ready;
11022 cur_uops = ix86_safe_ppro_uops (*insnp);
11024 /* If the decoders are empty, and we've a complex insn at the
11025 head of the priority queue, let it issue without complaint. */
11026 if (decode[0] == NULL)
11028 if (cur_uops == PPRO_UOPS_MANY)
11030 decode[0] = *insnp;
11031 goto ppro_done;
11034 /* Otherwise, search for a 2-4 uop unsn to issue. */
11035 while (cur_uops != PPRO_UOPS_FEW)
11037 if (insnp == ready)
11038 break;
11039 cur_uops = ix86_safe_ppro_uops (*--insnp);
11042 /* If so, move it to the head of the line. */
11043 if (cur_uops == PPRO_UOPS_FEW)
11044 ix86_reorder_insn (insnp, e_ready);
11046 /* Issue the head of the queue. */
11047 issued_this_cycle = 1;
11048 decode[0] = *e_ready--;
11051 /* Look for simple insns to fill in the other two slots. */
11052 for (i = 1; i < 3; ++i)
11053 if (decode[i] == NULL)
11055 if (ready > e_ready)
11056 goto ppro_done;
11058 insnp = e_ready;
11059 cur_uops = ix86_safe_ppro_uops (*insnp);
11060 while (cur_uops != PPRO_UOPS_ONE)
11062 if (insnp == ready)
11063 break;
11064 cur_uops = ix86_safe_ppro_uops (*--insnp);
11067 /* Found one. Move it to the head of the queue and issue it. */
11068 if (cur_uops == PPRO_UOPS_ONE)
11070 ix86_reorder_insn (insnp, e_ready);
11071 decode[i] = *e_ready--;
11072 issued_this_cycle++;
11073 continue;
11076 /* ??? Didn't find one. Ideally, here we would do a lazy split
11077 of 2-uop insns, issue one and queue the other. */
11080 ppro_done:
11081 if (issued_this_cycle == 0)
11082 issued_this_cycle = 1;
11083 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11086 /* We are about to being issuing insns for this clock cycle.
11087 Override the default sort algorithm to better slot instructions. */
11088 static int
11089 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11090 FILE *dump ATTRIBUTE_UNUSED;
11091 int sched_verbose ATTRIBUTE_UNUSED;
11092 rtx *ready;
11093 int *n_readyp;
11094 int clock_var ATTRIBUTE_UNUSED;
11096 int n_ready = *n_readyp;
11097 rtx *e_ready = ready + n_ready - 1;
11099 /* Make sure to go ahead and initialize key items in
11100 ix86_sched_data if we are not going to bother trying to
11101 reorder the ready queue. */
11102 if (n_ready < 2)
11104 ix86_sched_data.ppro.issued_this_cycle = 1;
11105 goto out;
11108 switch (ix86_cpu)
11110 default:
11111 break;
11113 case PROCESSOR_PENTIUMPRO:
11114 ix86_sched_reorder_ppro (ready, e_ready);
11115 break;
11118 out:
11119 return ix86_issue_rate ();
11122 /* We are about to issue INSN. Return the number of insns left on the
11123 ready queue that can be issued this cycle. */
11125 static int
11126 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11127 FILE *dump;
11128 int sched_verbose;
11129 rtx insn;
11130 int can_issue_more;
11132 int i;
11133 switch (ix86_cpu)
11135 default:
11136 return can_issue_more - 1;
11138 case PROCESSOR_PENTIUMPRO:
11140 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11142 if (uops == PPRO_UOPS_MANY)
11144 if (sched_verbose)
11145 ix86_dump_ppro_packet (dump);
11146 ix86_sched_data.ppro.decode[0] = insn;
11147 ix86_sched_data.ppro.decode[1] = NULL;
11148 ix86_sched_data.ppro.decode[2] = NULL;
11149 if (sched_verbose)
11150 ix86_dump_ppro_packet (dump);
11151 ix86_sched_data.ppro.decode[0] = NULL;
11153 else if (uops == PPRO_UOPS_FEW)
11155 if (sched_verbose)
11156 ix86_dump_ppro_packet (dump);
11157 ix86_sched_data.ppro.decode[0] = insn;
11158 ix86_sched_data.ppro.decode[1] = NULL;
11159 ix86_sched_data.ppro.decode[2] = NULL;
11161 else
11163 for (i = 0; i < 3; ++i)
11164 if (ix86_sched_data.ppro.decode[i] == NULL)
11166 ix86_sched_data.ppro.decode[i] = insn;
11167 break;
11169 if (i == 3)
11170 abort ();
11171 if (i == 2)
11173 if (sched_verbose)
11174 ix86_dump_ppro_packet (dump);
11175 ix86_sched_data.ppro.decode[0] = NULL;
11176 ix86_sched_data.ppro.decode[1] = NULL;
11177 ix86_sched_data.ppro.decode[2] = NULL;
11181 return --ix86_sched_data.ppro.issued_this_cycle;
11185 static int
11186 ia32_use_dfa_pipeline_interface ()
11188 if (ix86_cpu == PROCESSOR_PENTIUM)
11189 return 1;
11190 return 0;
11193 /* How many alternative schedules to try. This should be as wide as the
11194 scheduling freedom in the DFA, but no wider. Making this value too
11195 large results extra work for the scheduler. */
11197 static int
11198 ia32_multipass_dfa_lookahead ()
11200 if (ix86_cpu == PROCESSOR_PENTIUM)
11201 return 2;
11202 else
11203 return 0;
11207 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11208 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11209 appropriate. */
11211 void
11212 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11213 rtx insns;
11214 rtx dstref, srcref, dstreg, srcreg;
11216 rtx insn;
11218 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11219 if (INSN_P (insn))
11220 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11221 dstreg, srcreg);
11224 /* Subroutine of above to actually do the updating by recursively walking
11225 the rtx. */
11227 static void
11228 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11229 rtx x;
11230 rtx dstref, srcref, dstreg, srcreg;
11232 enum rtx_code code = GET_CODE (x);
11233 const char *format_ptr = GET_RTX_FORMAT (code);
11234 int i, j;
11236 if (code == MEM && XEXP (x, 0) == dstreg)
11237 MEM_COPY_ATTRIBUTES (x, dstref);
11238 else if (code == MEM && XEXP (x, 0) == srcreg)
11239 MEM_COPY_ATTRIBUTES (x, srcref);
11241 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11243 if (*format_ptr == 'e')
11244 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11245 dstreg, srcreg);
11246 else if (*format_ptr == 'E')
11247 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11248 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11249 dstreg, srcreg);
11253 /* Compute the alignment given to a constant that is being placed in memory.
11254 EXP is the constant and ALIGN is the alignment that the object would
11255 ordinarily have.
11256 The value of this function is used instead of that alignment to align
11257 the object. */
11260 ix86_constant_alignment (exp, align)
11261 tree exp;
11262 int align;
11264 if (TREE_CODE (exp) == REAL_CST)
11266 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11267 return 64;
11268 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11269 return 128;
11271 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11272 && align < 256)
11273 return 256;
11275 return align;
11278 /* Compute the alignment for a static variable.
11279 TYPE is the data type, and ALIGN is the alignment that
11280 the object would ordinarily have. The value of this function is used
11281 instead of that alignment to align the object. */
11284 ix86_data_alignment (type, align)
11285 tree type;
11286 int align;
11288 if (AGGREGATE_TYPE_P (type)
11289 && TYPE_SIZE (type)
11290 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11291 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11292 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11293 return 256;
11295 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11296 to 16byte boundary. */
11297 if (TARGET_64BIT)
11299 if (AGGREGATE_TYPE_P (type)
11300 && TYPE_SIZE (type)
11301 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11302 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11303 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11304 return 128;
11307 if (TREE_CODE (type) == ARRAY_TYPE)
11309 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11310 return 64;
11311 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11312 return 128;
11314 else if (TREE_CODE (type) == COMPLEX_TYPE)
11317 if (TYPE_MODE (type) == DCmode && align < 64)
11318 return 64;
11319 if (TYPE_MODE (type) == XCmode && align < 128)
11320 return 128;
11322 else if ((TREE_CODE (type) == RECORD_TYPE
11323 || TREE_CODE (type) == UNION_TYPE
11324 || TREE_CODE (type) == QUAL_UNION_TYPE)
11325 && TYPE_FIELDS (type))
11327 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11328 return 64;
11329 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11330 return 128;
11332 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11333 || TREE_CODE (type) == INTEGER_TYPE)
11335 if (TYPE_MODE (type) == DFmode && align < 64)
11336 return 64;
11337 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11338 return 128;
11341 return align;
11344 /* Compute the alignment for a local variable.
11345 TYPE is the data type, and ALIGN is the alignment that
11346 the object would ordinarily have. The value of this macro is used
11347 instead of that alignment to align the object. */
11350 ix86_local_alignment (type, align)
11351 tree type;
11352 int align;
11354 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11355 to 16byte boundary. */
11356 if (TARGET_64BIT)
11358 if (AGGREGATE_TYPE_P (type)
11359 && TYPE_SIZE (type)
11360 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11361 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11362 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11363 return 128;
11365 if (TREE_CODE (type) == ARRAY_TYPE)
11367 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11368 return 64;
11369 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11370 return 128;
11372 else if (TREE_CODE (type) == COMPLEX_TYPE)
11374 if (TYPE_MODE (type) == DCmode && align < 64)
11375 return 64;
11376 if (TYPE_MODE (type) == XCmode && align < 128)
11377 return 128;
11379 else if ((TREE_CODE (type) == RECORD_TYPE
11380 || TREE_CODE (type) == UNION_TYPE
11381 || TREE_CODE (type) == QUAL_UNION_TYPE)
11382 && TYPE_FIELDS (type))
11384 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11385 return 64;
11386 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11387 return 128;
11389 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11390 || TREE_CODE (type) == INTEGER_TYPE)
11393 if (TYPE_MODE (type) == DFmode && align < 64)
11394 return 64;
11395 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11396 return 128;
11398 return align;
11401 /* Emit RTL insns to initialize the variable parts of a trampoline.
11402 FNADDR is an RTX for the address of the function's pure code.
11403 CXT is an RTX for the static chain value for the function. */
11404 void
11405 x86_initialize_trampoline (tramp, fnaddr, cxt)
11406 rtx tramp, fnaddr, cxt;
11408 if (!TARGET_64BIT)
11410 /* Compute offset from the end of the jmp to the target function. */
11411 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11412 plus_constant (tramp, 10),
11413 NULL_RTX, 1, OPTAB_DIRECT);
11414 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11415 gen_int_mode (0xb9, QImode));
11416 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11417 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11418 gen_int_mode (0xe9, QImode));
11419 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11421 else
11423 int offset = 0;
11424 /* Try to load address using shorter movl instead of movabs.
11425 We may want to support movq for kernel mode, but kernel does not use
11426 trampolines at the moment. */
11427 if (x86_64_zero_extended_value (fnaddr))
11429 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11430 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11431 gen_int_mode (0xbb41, HImode));
11432 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11433 gen_lowpart (SImode, fnaddr));
11434 offset += 6;
11436 else
11438 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11439 gen_int_mode (0xbb49, HImode));
11440 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11441 fnaddr);
11442 offset += 10;
11444 /* Load static chain using movabs to r10. */
11445 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11446 gen_int_mode (0xba49, HImode));
11447 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11448 cxt);
11449 offset += 10;
11450 /* Jump to the r11 */
11451 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11452 gen_int_mode (0xff49, HImode));
11453 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11454 gen_int_mode (0xe3, QImode));
11455 offset += 3;
11456 if (offset > TRAMPOLINE_SIZE)
11457 abort ();
11461 #define def_builtin(MASK, NAME, TYPE, CODE) \
11462 do { \
11463 if ((MASK) & target_flags) \
11464 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
11465 } while (0)
11467 struct builtin_description
11469 const unsigned int mask;
11470 const enum insn_code icode;
11471 const char *const name;
11472 const enum ix86_builtins code;
11473 const enum rtx_code comparison;
11474 const unsigned int flag;
11477 /* Used for builtins that are enabled both by -msse and -msse2. */
11478 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11480 static const struct builtin_description bdesc_comi[] =
11482 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11483 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11484 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11485 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11486 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11487 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11488 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11489 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11490 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11491 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11492 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11493 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11494 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11495 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11496 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11497 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11498 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11499 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11500 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11501 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11502 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11503 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11504 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11505 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11508 static const struct builtin_description bdesc_2arg[] =
11510 /* SSE */
11511 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11512 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11513 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11514 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11515 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11516 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11517 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11518 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11520 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11521 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11522 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11523 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11524 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11525 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11526 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11527 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11528 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11529 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11530 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11531 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11532 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11533 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11534 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11535 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11536 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11537 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11538 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11539 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11540 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11541 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11542 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11543 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11545 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11546 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11547 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11548 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11550 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11551 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11552 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11553 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11554 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11556 /* MMX */
11557 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11558 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11559 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11560 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11561 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11562 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11564 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11565 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11566 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11567 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11568 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11569 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11570 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11571 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11573 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11574 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11575 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11577 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11578 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11579 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11580 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11582 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11583 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11585 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11586 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11587 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11588 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11589 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11590 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11592 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11593 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11594 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11595 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11597 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11598 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11599 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11600 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11601 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11602 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11604 /* Special. */
11605 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11606 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11607 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11609 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11610 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11612 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11613 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11614 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11615 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11616 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11617 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11619 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11620 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11621 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11622 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11623 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11624 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11626 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11627 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11628 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11629 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11631 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11632 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11634 /* SSE2 */
11635 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11636 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11637 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11638 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11639 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11640 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11641 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11642 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11644 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11645 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11646 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11647 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11648 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11649 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11650 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11651 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11652 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11653 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11654 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11655 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11656 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11657 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11658 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11659 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11660 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11661 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11662 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11663 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11664 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11665 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11666 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11667 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11669 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11670 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11671 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11672 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11674 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11675 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11676 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11677 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11679 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11680 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11681 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11683 /* SSE2 MMX */
11684 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11685 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11686 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11687 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11688 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11689 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11690 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11691 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11693 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11694 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11695 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11696 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11697 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11698 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11699 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11700 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11702 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11703 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11704 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11705 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11707 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11708 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11709 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11710 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11712 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11713 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11715 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11716 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11717 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11718 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11719 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11720 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11722 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11723 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11724 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11725 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11727 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11728 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11729 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11730 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11731 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11732 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11734 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11735 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11736 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11738 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11739 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11741 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11742 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11743 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11744 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11745 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11746 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11748 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11749 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11750 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11751 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11752 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11753 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11755 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11756 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11757 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11758 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11760 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11762 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11763 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11764 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11767 static const struct builtin_description bdesc_1arg[] =
11769 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11770 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11772 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11773 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11774 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11776 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11777 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11778 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11779 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11781 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11782 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11783 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11785 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11787 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11788 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11790 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11791 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11792 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11793 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11794 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11796 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11798 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11799 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11801 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11802 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11803 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11806 void
11807 ix86_init_builtins ()
11809 if (TARGET_MMX)
11810 ix86_init_mmx_sse_builtins ();
11813 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11814 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11815 builtins. */
11816 static void
11817 ix86_init_mmx_sse_builtins ()
11819 const struct builtin_description * d;
11820 size_t i;
11821 tree endlink = void_list_node;
11823 tree pchar_type_node = build_pointer_type (char_type_node);
11824 tree pfloat_type_node = build_pointer_type (float_type_node);
11825 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11826 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11827 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11829 /* Comparisons. */
11830 tree int_ftype_v4sf_v4sf
11831 = build_function_type (integer_type_node,
11832 tree_cons (NULL_TREE, V4SF_type_node,
11833 tree_cons (NULL_TREE,
11834 V4SF_type_node,
11835 endlink)));
11836 tree v4si_ftype_v4sf_v4sf
11837 = build_function_type (V4SI_type_node,
11838 tree_cons (NULL_TREE, V4SF_type_node,
11839 tree_cons (NULL_TREE,
11840 V4SF_type_node,
11841 endlink)));
11842 /* MMX/SSE/integer conversions. */
11843 tree int_ftype_v4sf
11844 = build_function_type (integer_type_node,
11845 tree_cons (NULL_TREE, V4SF_type_node,
11846 endlink));
11847 tree int_ftype_v8qi
11848 = build_function_type (integer_type_node,
11849 tree_cons (NULL_TREE, V8QI_type_node,
11850 endlink));
11851 tree v4sf_ftype_v4sf_int
11852 = build_function_type (V4SF_type_node,
11853 tree_cons (NULL_TREE, V4SF_type_node,
11854 tree_cons (NULL_TREE, integer_type_node,
11855 endlink)));
11856 tree v4sf_ftype_v4sf_v2si
11857 = build_function_type (V4SF_type_node,
11858 tree_cons (NULL_TREE, V4SF_type_node,
11859 tree_cons (NULL_TREE, V2SI_type_node,
11860 endlink)));
11861 tree int_ftype_v4hi_int
11862 = build_function_type (integer_type_node,
11863 tree_cons (NULL_TREE, V4HI_type_node,
11864 tree_cons (NULL_TREE, integer_type_node,
11865 endlink)));
11866 tree v4hi_ftype_v4hi_int_int
11867 = build_function_type (V4HI_type_node,
11868 tree_cons (NULL_TREE, V4HI_type_node,
11869 tree_cons (NULL_TREE, integer_type_node,
11870 tree_cons (NULL_TREE,
11871 integer_type_node,
11872 endlink))));
11873 /* Miscellaneous. */
11874 tree v8qi_ftype_v4hi_v4hi
11875 = build_function_type (V8QI_type_node,
11876 tree_cons (NULL_TREE, V4HI_type_node,
11877 tree_cons (NULL_TREE, V4HI_type_node,
11878 endlink)));
11879 tree v4hi_ftype_v2si_v2si
11880 = build_function_type (V4HI_type_node,
11881 tree_cons (NULL_TREE, V2SI_type_node,
11882 tree_cons (NULL_TREE, V2SI_type_node,
11883 endlink)));
11884 tree v4sf_ftype_v4sf_v4sf_int
11885 = build_function_type (V4SF_type_node,
11886 tree_cons (NULL_TREE, V4SF_type_node,
11887 tree_cons (NULL_TREE, V4SF_type_node,
11888 tree_cons (NULL_TREE,
11889 integer_type_node,
11890 endlink))));
11891 tree v2si_ftype_v4hi_v4hi
11892 = build_function_type (V2SI_type_node,
11893 tree_cons (NULL_TREE, V4HI_type_node,
11894 tree_cons (NULL_TREE, V4HI_type_node,
11895 endlink)));
11896 tree v4hi_ftype_v4hi_int
11897 = build_function_type (V4HI_type_node,
11898 tree_cons (NULL_TREE, V4HI_type_node,
11899 tree_cons (NULL_TREE, integer_type_node,
11900 endlink)));
11901 tree v4hi_ftype_v4hi_di
11902 = build_function_type (V4HI_type_node,
11903 tree_cons (NULL_TREE, V4HI_type_node,
11904 tree_cons (NULL_TREE,
11905 long_long_integer_type_node,
11906 endlink)));
11907 tree v2si_ftype_v2si_di
11908 = build_function_type (V2SI_type_node,
11909 tree_cons (NULL_TREE, V2SI_type_node,
11910 tree_cons (NULL_TREE,
11911 long_long_integer_type_node,
11912 endlink)));
11913 tree void_ftype_void
11914 = build_function_type (void_type_node, endlink);
11915 tree void_ftype_unsigned
11916 = build_function_type (void_type_node,
11917 tree_cons (NULL_TREE, unsigned_type_node,
11918 endlink));
11919 tree unsigned_ftype_void
11920 = build_function_type (unsigned_type_node, endlink);
11921 tree di_ftype_void
11922 = build_function_type (long_long_unsigned_type_node, endlink);
11923 tree v4sf_ftype_void
11924 = build_function_type (V4SF_type_node, endlink);
11925 tree v2si_ftype_v4sf
11926 = build_function_type (V2SI_type_node,
11927 tree_cons (NULL_TREE, V4SF_type_node,
11928 endlink));
11929 /* Loads/stores. */
11930 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11931 tree_cons (NULL_TREE, V8QI_type_node,
11932 tree_cons (NULL_TREE,
11933 pchar_type_node,
11934 endlink)));
11935 tree void_ftype_v8qi_v8qi_pchar
11936 = build_function_type (void_type_node, maskmovq_args);
11937 tree v4sf_ftype_pfloat
11938 = build_function_type (V4SF_type_node,
11939 tree_cons (NULL_TREE, pfloat_type_node,
11940 endlink));
11941 /* @@@ the type is bogus */
11942 tree v4sf_ftype_v4sf_pv2si
11943 = build_function_type (V4SF_type_node,
11944 tree_cons (NULL_TREE, V4SF_type_node,
11945 tree_cons (NULL_TREE, pv2si_type_node,
11946 endlink)));
11947 tree void_ftype_pv2si_v4sf
11948 = build_function_type (void_type_node,
11949 tree_cons (NULL_TREE, pv2si_type_node,
11950 tree_cons (NULL_TREE, V4SF_type_node,
11951 endlink)));
11952 tree void_ftype_pfloat_v4sf
11953 = build_function_type (void_type_node,
11954 tree_cons (NULL_TREE, pfloat_type_node,
11955 tree_cons (NULL_TREE, V4SF_type_node,
11956 endlink)));
11957 tree void_ftype_pdi_di
11958 = build_function_type (void_type_node,
11959 tree_cons (NULL_TREE, pdi_type_node,
11960 tree_cons (NULL_TREE,
11961 long_long_unsigned_type_node,
11962 endlink)));
11963 tree void_ftype_pv2di_v2di
11964 = build_function_type (void_type_node,
11965 tree_cons (NULL_TREE, pv2di_type_node,
11966 tree_cons (NULL_TREE,
11967 V2DI_type_node,
11968 endlink)));
11969 /* Normal vector unops. */
11970 tree v4sf_ftype_v4sf
11971 = build_function_type (V4SF_type_node,
11972 tree_cons (NULL_TREE, V4SF_type_node,
11973 endlink));
11975 /* Normal vector binops. */
11976 tree v4sf_ftype_v4sf_v4sf
11977 = build_function_type (V4SF_type_node,
11978 tree_cons (NULL_TREE, V4SF_type_node,
11979 tree_cons (NULL_TREE, V4SF_type_node,
11980 endlink)));
11981 tree v8qi_ftype_v8qi_v8qi
11982 = build_function_type (V8QI_type_node,
11983 tree_cons (NULL_TREE, V8QI_type_node,
11984 tree_cons (NULL_TREE, V8QI_type_node,
11985 endlink)));
11986 tree v4hi_ftype_v4hi_v4hi
11987 = build_function_type (V4HI_type_node,
11988 tree_cons (NULL_TREE, V4HI_type_node,
11989 tree_cons (NULL_TREE, V4HI_type_node,
11990 endlink)));
11991 tree v2si_ftype_v2si_v2si
11992 = build_function_type (V2SI_type_node,
11993 tree_cons (NULL_TREE, V2SI_type_node,
11994 tree_cons (NULL_TREE, V2SI_type_node,
11995 endlink)));
11996 tree di_ftype_di_di
11997 = build_function_type (long_long_unsigned_type_node,
11998 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11999 tree_cons (NULL_TREE,
12000 long_long_unsigned_type_node,
12001 endlink)));
12003 tree v2si_ftype_v2sf
12004 = build_function_type (V2SI_type_node,
12005 tree_cons (NULL_TREE, V2SF_type_node,
12006 endlink));
12007 tree v2sf_ftype_v2si
12008 = build_function_type (V2SF_type_node,
12009 tree_cons (NULL_TREE, V2SI_type_node,
12010 endlink));
12011 tree v2si_ftype_v2si
12012 = build_function_type (V2SI_type_node,
12013 tree_cons (NULL_TREE, V2SI_type_node,
12014 endlink));
12015 tree v2sf_ftype_v2sf
12016 = build_function_type (V2SF_type_node,
12017 tree_cons (NULL_TREE, V2SF_type_node,
12018 endlink));
12019 tree v2sf_ftype_v2sf_v2sf
12020 = build_function_type (V2SF_type_node,
12021 tree_cons (NULL_TREE, V2SF_type_node,
12022 tree_cons (NULL_TREE,
12023 V2SF_type_node,
12024 endlink)));
12025 tree v2si_ftype_v2sf_v2sf
12026 = build_function_type (V2SI_type_node,
12027 tree_cons (NULL_TREE, V2SF_type_node,
12028 tree_cons (NULL_TREE,
12029 V2SF_type_node,
12030 endlink)));
12031 tree pint_type_node = build_pointer_type (integer_type_node);
12032 tree pdouble_type_node = build_pointer_type (double_type_node);
12033 tree int_ftype_v2df_v2df
12034 = build_function_type (integer_type_node,
12035 tree_cons (NULL_TREE, V2DF_type_node,
12036 tree_cons (NULL_TREE, V2DF_type_node, endlink)));
12038 tree ti_ftype_void
12039 = build_function_type (intTI_type_node, endlink);
12040 tree ti_ftype_ti_ti
12041 = build_function_type (intTI_type_node,
12042 tree_cons (NULL_TREE, intTI_type_node,
12043 tree_cons (NULL_TREE, intTI_type_node,
12044 endlink)));
12045 tree void_ftype_pvoid
12046 = build_function_type (void_type_node,
12047 tree_cons (NULL_TREE, ptr_type_node, endlink));
12048 tree v2di_ftype_di
12049 = build_function_type (V2DI_type_node,
12050 tree_cons (NULL_TREE, long_long_unsigned_type_node,
12051 endlink));
12052 tree v4sf_ftype_v4si
12053 = build_function_type (V4SF_type_node,
12054 tree_cons (NULL_TREE, V4SI_type_node, endlink));
12055 tree v4si_ftype_v4sf
12056 = build_function_type (V4SI_type_node,
12057 tree_cons (NULL_TREE, V4SF_type_node, endlink));
12058 tree v2df_ftype_v4si
12059 = build_function_type (V2DF_type_node,
12060 tree_cons (NULL_TREE, V4SI_type_node, endlink));
12061 tree v4si_ftype_v2df
12062 = build_function_type (V4SI_type_node,
12063 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12064 tree v2si_ftype_v2df
12065 = build_function_type (V2SI_type_node,
12066 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12067 tree v4sf_ftype_v2df
12068 = build_function_type (V4SF_type_node,
12069 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12070 tree v2df_ftype_v2si
12071 = build_function_type (V2DF_type_node,
12072 tree_cons (NULL_TREE, V2SI_type_node, endlink));
12073 tree v2df_ftype_v4sf
12074 = build_function_type (V2DF_type_node,
12075 tree_cons (NULL_TREE, V4SF_type_node, endlink));
12076 tree int_ftype_v2df
12077 = build_function_type (integer_type_node,
12078 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12079 tree v2df_ftype_v2df_int
12080 = build_function_type (V2DF_type_node,
12081 tree_cons (NULL_TREE, V2DF_type_node,
12082 tree_cons (NULL_TREE, integer_type_node,
12083 endlink)));
12084 tree v4sf_ftype_v4sf_v2df
12085 = build_function_type (V4SF_type_node,
12086 tree_cons (NULL_TREE, V4SF_type_node,
12087 tree_cons (NULL_TREE, V2DF_type_node,
12088 endlink)));
12089 tree v2df_ftype_v2df_v4sf
12090 = build_function_type (V2DF_type_node,
12091 tree_cons (NULL_TREE, V2DF_type_node,
12092 tree_cons (NULL_TREE, V4SF_type_node,
12093 endlink)));
12094 tree v2df_ftype_v2df_v2df_int
12095 = build_function_type (V2DF_type_node,
12096 tree_cons (NULL_TREE, V2DF_type_node,
12097 tree_cons (NULL_TREE, V2DF_type_node,
12098 tree_cons (NULL_TREE,
12099 integer_type_node,
12100 endlink))));
12101 tree v2df_ftype_v2df_pv2si
12102 = build_function_type (V2DF_type_node,
12103 tree_cons (NULL_TREE, V2DF_type_node,
12104 tree_cons (NULL_TREE, pv2si_type_node,
12105 endlink)));
12106 tree void_ftype_pv2si_v2df
12107 = build_function_type (void_type_node,
12108 tree_cons (NULL_TREE, pv2si_type_node,
12109 tree_cons (NULL_TREE, V2DF_type_node,
12110 endlink)));
12111 tree void_ftype_pdouble_v2df
12112 = build_function_type (void_type_node,
12113 tree_cons (NULL_TREE, pdouble_type_node,
12114 tree_cons (NULL_TREE, V2DF_type_node,
12115 endlink)));
12116 tree void_ftype_pint_int
12117 = build_function_type (void_type_node,
12118 tree_cons (NULL_TREE, pint_type_node,
12119 tree_cons (NULL_TREE, integer_type_node,
12120 endlink)));
12121 tree maskmovdqu_args = tree_cons (NULL_TREE, V16QI_type_node,
12122 tree_cons (NULL_TREE, V16QI_type_node,
12123 tree_cons (NULL_TREE,
12124 pchar_type_node,
12125 endlink)));
12126 tree void_ftype_v16qi_v16qi_pchar
12127 = build_function_type (void_type_node, maskmovdqu_args);
12128 tree v2df_ftype_pdouble
12129 = build_function_type (V2DF_type_node,
12130 tree_cons (NULL_TREE, pdouble_type_node,
12131 endlink));
12132 tree v2df_ftype_v2df_v2df
12133 = build_function_type (V2DF_type_node,
12134 tree_cons (NULL_TREE, V2DF_type_node,
12135 tree_cons (NULL_TREE, V2DF_type_node,
12136 endlink)));
12137 tree v16qi_ftype_v16qi_v16qi
12138 = build_function_type (V16QI_type_node,
12139 tree_cons (NULL_TREE, V16QI_type_node,
12140 tree_cons (NULL_TREE, V16QI_type_node,
12141 endlink)));
12142 tree v8hi_ftype_v8hi_v8hi
12143 = build_function_type (V8HI_type_node,
12144 tree_cons (NULL_TREE, V8HI_type_node,
12145 tree_cons (NULL_TREE, V8HI_type_node,
12146 endlink)));
12147 tree v4si_ftype_v4si_v4si
12148 = build_function_type (V4SI_type_node,
12149 tree_cons (NULL_TREE, V4SI_type_node,
12150 tree_cons (NULL_TREE, V4SI_type_node,
12151 endlink)));
12152 tree v2di_ftype_v2di_v2di
12153 = build_function_type (V2DI_type_node,
12154 tree_cons (NULL_TREE, V2DI_type_node,
12155 tree_cons (NULL_TREE, V2DI_type_node,
12156 endlink)));
12157 tree v2di_ftype_v2df_v2df
12158 = build_function_type (V2DI_type_node,
12159 tree_cons (NULL_TREE, V2DF_type_node,
12160 tree_cons (NULL_TREE, V2DF_type_node,
12161 endlink)));
12162 tree v2df_ftype_v2df
12163 = build_function_type (V2DF_type_node,
12164 tree_cons (NULL_TREE, V2DF_type_node,
12165 endlink));
12166 tree v2df_ftype_double
12167 = build_function_type (V2DF_type_node,
12168 tree_cons (NULL_TREE, double_type_node,
12169 endlink));
12170 tree v2df_ftype_double_double
12171 = build_function_type (V2DF_type_node,
12172 tree_cons (NULL_TREE, double_type_node,
12173 tree_cons (NULL_TREE, double_type_node,
12174 endlink)));
12175 tree int_ftype_v8hi_int
12176 = build_function_type (integer_type_node,
12177 tree_cons (NULL_TREE, V8HI_type_node,
12178 tree_cons (NULL_TREE, integer_type_node,
12179 endlink)));
12180 tree v8hi_ftype_v8hi_int_int
12181 = build_function_type (V8HI_type_node,
12182 tree_cons (NULL_TREE, V8HI_type_node,
12183 tree_cons (NULL_TREE, integer_type_node,
12184 tree_cons (NULL_TREE,
12185 integer_type_node,
12186 endlink))));
12187 tree v2di_ftype_v2di_int
12188 = build_function_type (V2DI_type_node,
12189 tree_cons (NULL_TREE, V2DI_type_node,
12190 tree_cons (NULL_TREE, integer_type_node,
12191 endlink)));
12192 tree v4si_ftype_v4si_int
12193 = build_function_type (V4SI_type_node,
12194 tree_cons (NULL_TREE, V4SI_type_node,
12195 tree_cons (NULL_TREE, integer_type_node,
12196 endlink)));
12197 tree v8hi_ftype_v8hi_int
12198 = build_function_type (V8HI_type_node,
12199 tree_cons (NULL_TREE, V8HI_type_node,
12200 tree_cons (NULL_TREE, integer_type_node,
12201 endlink)));
12202 tree v8hi_ftype_v8hi_v2di
12203 = build_function_type (V8HI_type_node,
12204 tree_cons (NULL_TREE, V8HI_type_node,
12205 tree_cons (NULL_TREE, V2DI_type_node,
12206 endlink)));
12207 tree v4si_ftype_v4si_v2di
12208 = build_function_type (V4SI_type_node,
12209 tree_cons (NULL_TREE, V4SI_type_node,
12210 tree_cons (NULL_TREE, V2DI_type_node,
12211 endlink)));
12212 tree v4si_ftype_v8hi_v8hi
12213 = build_function_type (V4SI_type_node,
12214 tree_cons (NULL_TREE, V8HI_type_node,
12215 tree_cons (NULL_TREE, V8HI_type_node,
12216 endlink)));
12217 tree di_ftype_v8qi_v8qi
12218 = build_function_type (long_long_unsigned_type_node,
12219 tree_cons (NULL_TREE, V8QI_type_node,
12220 tree_cons (NULL_TREE, V8QI_type_node,
12221 endlink)));
12222 tree v2di_ftype_v16qi_v16qi
12223 = build_function_type (V2DI_type_node,
12224 tree_cons (NULL_TREE, V16QI_type_node,
12225 tree_cons (NULL_TREE, V16QI_type_node,
12226 endlink)));
12227 tree int_ftype_v16qi
12228 = build_function_type (integer_type_node,
12229 tree_cons (NULL_TREE, V16QI_type_node, endlink));
12231 /* Add all builtins that are more or less simple operations on two
12232 operands. */
12233 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12235 /* Use one of the operands; the target can have a different mode for
12236 mask-generating compares. */
12237 enum machine_mode mode;
12238 tree type;
12240 if (d->name == 0)
12241 continue;
12242 mode = insn_data[d->icode].operand[1].mode;
12244 switch (mode)
12246 case V16QImode:
12247 type = v16qi_ftype_v16qi_v16qi;
12248 break;
12249 case V8HImode:
12250 type = v8hi_ftype_v8hi_v8hi;
12251 break;
12252 case V4SImode:
12253 type = v4si_ftype_v4si_v4si;
12254 break;
12255 case V2DImode:
12256 type = v2di_ftype_v2di_v2di;
12257 break;
12258 case V2DFmode:
12259 type = v2df_ftype_v2df_v2df;
12260 break;
12261 case TImode:
12262 type = ti_ftype_ti_ti;
12263 break;
12264 case V4SFmode:
12265 type = v4sf_ftype_v4sf_v4sf;
12266 break;
12267 case V8QImode:
12268 type = v8qi_ftype_v8qi_v8qi;
12269 break;
12270 case V4HImode:
12271 type = v4hi_ftype_v4hi_v4hi;
12272 break;
12273 case V2SImode:
12274 type = v2si_ftype_v2si_v2si;
12275 break;
12276 case DImode:
12277 type = di_ftype_di_di;
12278 break;
12280 default:
12281 abort ();
12284 /* Override for comparisons. */
12285 if (d->icode == CODE_FOR_maskcmpv4sf3
12286 || d->icode == CODE_FOR_maskncmpv4sf3
12287 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12288 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12289 type = v4si_ftype_v4sf_v4sf;
12291 if (d->icode == CODE_FOR_maskcmpv2df3
12292 || d->icode == CODE_FOR_maskncmpv2df3
12293 || d->icode == CODE_FOR_vmmaskcmpv2df3
12294 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12295 type = v2di_ftype_v2df_v2df;
12297 def_builtin (d->mask, d->name, type, d->code);
12300 /* Add the remaining MMX insns with somewhat more complicated types. */
12301 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12302 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12303 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12304 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12305 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12306 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12307 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12309 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12310 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12311 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12313 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12314 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12316 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12317 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12319 /* comi/ucomi insns. */
12320 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12321 if (d->mask == MASK_SSE2)
12322 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12323 else
12324 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12326 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12327 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12328 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12330 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12331 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12332 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12333 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12334 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12335 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12337 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12338 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12339 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12340 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12342 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12343 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12345 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12347 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12348 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12349 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12350 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12351 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12352 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12354 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12355 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12356 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12357 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12359 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12360 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12361 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12362 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12364 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12366 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12368 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12369 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12370 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12371 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12372 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12373 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12375 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12377 /* Original 3DNow! */
12378 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12379 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12380 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12381 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12382 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12383 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12384 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12385 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12386 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12387 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12388 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12389 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12390 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12391 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12392 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12393 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12394 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12395 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12396 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12397 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12399 /* 3DNow! extension as used in the Athlon CPU. */
12400 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12401 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12402 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12403 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12404 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12405 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12407 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12409 /* SSE2 */
12410 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12411 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12413 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12414 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12416 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12417 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12418 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12419 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12420 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12421 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12423 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12424 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12425 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12426 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12428 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12429 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12430 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12431 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12432 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12434 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12435 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12436 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12437 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12439 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12440 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12442 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12444 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12445 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12447 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12448 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12449 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12450 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12451 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12453 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12455 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12456 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12458 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12459 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12460 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12462 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12463 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12464 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12466 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12467 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12468 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12469 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12470 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12471 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12472 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12474 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12475 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12476 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12478 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12479 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12480 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12482 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12483 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12484 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12486 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12487 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12489 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12490 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12491 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12493 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12494 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12495 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12497 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12498 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12500 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12503 /* Errors in the source file can cause expand_expr to return const0_rtx
12504 where we expect a vector. To avoid crashing, use one of the vector
12505 clear instructions. */
12506 static rtx
12507 safe_vector_operand (x, mode)
12508 rtx x;
12509 enum machine_mode mode;
12511 if (x != const0_rtx)
12512 return x;
12513 x = gen_reg_rtx (mode);
12515 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12516 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12517 : gen_rtx_SUBREG (DImode, x, 0)));
12518 else
12519 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12520 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12521 return x;
12524 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12526 static rtx
12527 ix86_expand_binop_builtin (icode, arglist, target)
12528 enum insn_code icode;
12529 tree arglist;
12530 rtx target;
12532 rtx pat;
12533 tree arg0 = TREE_VALUE (arglist);
12534 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12535 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12536 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12537 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12538 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12539 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12541 if (VECTOR_MODE_P (mode0))
12542 op0 = safe_vector_operand (op0, mode0);
12543 if (VECTOR_MODE_P (mode1))
12544 op1 = safe_vector_operand (op1, mode1);
12546 if (! target
12547 || GET_MODE (target) != tmode
12548 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12549 target = gen_reg_rtx (tmode);
12551 /* In case the insn wants input operands in modes different from
12552 the result, abort. */
12553 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12554 abort ();
12556 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12557 op0 = copy_to_mode_reg (mode0, op0);
12558 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12559 op1 = copy_to_mode_reg (mode1, op1);
12561 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12562 yet one of the two must not be a memory. This is normally enforced
12563 by expanders, but we didn't bother to create one here. */
12564 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12565 op0 = copy_to_mode_reg (mode0, op0);
12567 pat = GEN_FCN (icode) (target, op0, op1);
12568 if (! pat)
12569 return 0;
12570 emit_insn (pat);
12571 return target;
12574 /* In type_for_mode we restrict the ability to create TImode types
12575 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12576 to have a V4SFmode signature. Convert them in-place to TImode. */
12578 static rtx
12579 ix86_expand_timode_binop_builtin (icode, arglist, target)
12580 enum insn_code icode;
12581 tree arglist;
12582 rtx target;
12584 rtx pat;
12585 tree arg0 = TREE_VALUE (arglist);
12586 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12587 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12588 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12590 op0 = gen_lowpart (TImode, op0);
12591 op1 = gen_lowpart (TImode, op1);
12592 target = gen_reg_rtx (TImode);
12594 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12595 op0 = copy_to_mode_reg (TImode, op0);
12596 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12597 op1 = copy_to_mode_reg (TImode, op1);
12599 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12600 yet one of the two must not be a memory. This is normally enforced
12601 by expanders, but we didn't bother to create one here. */
12602 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12603 op0 = copy_to_mode_reg (TImode, op0);
12605 pat = GEN_FCN (icode) (target, op0, op1);
12606 if (! pat)
12607 return 0;
12608 emit_insn (pat);
12610 return gen_lowpart (V4SFmode, target);
12613 /* Subroutine of ix86_expand_builtin to take care of stores. */
12615 static rtx
12616 ix86_expand_store_builtin (icode, arglist)
12617 enum insn_code icode;
12618 tree arglist;
12620 rtx pat;
12621 tree arg0 = TREE_VALUE (arglist);
12622 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12623 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12624 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12625 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12626 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12628 if (VECTOR_MODE_P (mode1))
12629 op1 = safe_vector_operand (op1, mode1);
12631 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12633 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12634 op1 = copy_to_mode_reg (mode1, op1);
12636 pat = GEN_FCN (icode) (op0, op1);
12637 if (pat)
12638 emit_insn (pat);
12639 return 0;
12642 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12644 static rtx
12645 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12646 enum insn_code icode;
12647 tree arglist;
12648 rtx target;
12649 int do_load;
12651 rtx pat;
12652 tree arg0 = TREE_VALUE (arglist);
12653 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12654 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12655 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12657 if (! target
12658 || GET_MODE (target) != tmode
12659 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12660 target = gen_reg_rtx (tmode);
12661 if (do_load)
12662 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12663 else
12665 if (VECTOR_MODE_P (mode0))
12666 op0 = safe_vector_operand (op0, mode0);
12668 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12669 op0 = copy_to_mode_reg (mode0, op0);
12672 pat = GEN_FCN (icode) (target, op0);
12673 if (! pat)
12674 return 0;
12675 emit_insn (pat);
12676 return target;
12679 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12680 sqrtss, rsqrtss, rcpss. */
12682 static rtx
12683 ix86_expand_unop1_builtin (icode, arglist, target)
12684 enum insn_code icode;
12685 tree arglist;
12686 rtx target;
12688 rtx pat;
12689 tree arg0 = TREE_VALUE (arglist);
12690 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12691 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12692 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12694 if (! target
12695 || GET_MODE (target) != tmode
12696 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12697 target = gen_reg_rtx (tmode);
12699 if (VECTOR_MODE_P (mode0))
12700 op0 = safe_vector_operand (op0, mode0);
12702 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12703 op0 = copy_to_mode_reg (mode0, op0);
12705 op1 = op0;
12706 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12707 op1 = copy_to_mode_reg (mode0, op1);
12709 pat = GEN_FCN (icode) (target, op0, op1);
12710 if (! pat)
12711 return 0;
12712 emit_insn (pat);
12713 return target;
12716 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12718 static rtx
12719 ix86_expand_sse_compare (d, arglist, target)
12720 const struct builtin_description *d;
12721 tree arglist;
12722 rtx target;
12724 rtx pat;
12725 tree arg0 = TREE_VALUE (arglist);
12726 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12727 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12728 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12729 rtx op2;
12730 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12731 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12732 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12733 enum rtx_code comparison = d->comparison;
12735 if (VECTOR_MODE_P (mode0))
12736 op0 = safe_vector_operand (op0, mode0);
12737 if (VECTOR_MODE_P (mode1))
12738 op1 = safe_vector_operand (op1, mode1);
12740 /* Swap operands if we have a comparison that isn't available in
12741 hardware. */
12742 if (d->flag)
12744 rtx tmp = gen_reg_rtx (mode1);
12745 emit_move_insn (tmp, op1);
12746 op1 = op0;
12747 op0 = tmp;
12750 if (! target
12751 || GET_MODE (target) != tmode
12752 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12753 target = gen_reg_rtx (tmode);
12755 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12756 op0 = copy_to_mode_reg (mode0, op0);
12757 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12758 op1 = copy_to_mode_reg (mode1, op1);
12760 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12761 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12762 if (! pat)
12763 return 0;
12764 emit_insn (pat);
12765 return target;
12768 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12770 static rtx
12771 ix86_expand_sse_comi (d, arglist, target)
12772 const struct builtin_description *d;
12773 tree arglist;
12774 rtx target;
12776 rtx pat;
12777 tree arg0 = TREE_VALUE (arglist);
12778 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12779 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12780 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12781 rtx op2;
12782 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12783 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12784 enum rtx_code comparison = d->comparison;
12786 if (VECTOR_MODE_P (mode0))
12787 op0 = safe_vector_operand (op0, mode0);
12788 if (VECTOR_MODE_P (mode1))
12789 op1 = safe_vector_operand (op1, mode1);
12791 /* Swap operands if we have a comparison that isn't available in
12792 hardware. */
12793 if (d->flag)
12795 rtx tmp = op1;
12796 op1 = op0;
12797 op0 = tmp;
12800 target = gen_reg_rtx (SImode);
12801 emit_move_insn (target, const0_rtx);
12802 target = gen_rtx_SUBREG (QImode, target, 0);
12804 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12805 op0 = copy_to_mode_reg (mode0, op0);
12806 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12807 op1 = copy_to_mode_reg (mode1, op1);
12809 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12810 pat = GEN_FCN (d->icode) (op0, op1, op2);
12811 if (! pat)
12812 return 0;
12813 emit_insn (pat);
12814 emit_insn (gen_rtx_SET (VOIDmode,
12815 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12816 gen_rtx_fmt_ee (comparison, QImode,
12817 gen_rtx_REG (CCmode, FLAGS_REG),
12818 const0_rtx)));
12820 return SUBREG_REG (target);
12823 /* Expand an expression EXP that calls a built-in function,
12824 with result going to TARGET if that's convenient
12825 (and in mode MODE if that's convenient).
12826 SUBTARGET may be used as the target for computing one of EXP's operands.
12827 IGNORE is nonzero if the value is to be ignored. */
12830 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12831 tree exp;
12832 rtx target;
12833 rtx subtarget ATTRIBUTE_UNUSED;
12834 enum machine_mode mode ATTRIBUTE_UNUSED;
12835 int ignore ATTRIBUTE_UNUSED;
12837 const struct builtin_description *d;
12838 size_t i;
12839 enum insn_code icode;
12840 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12841 tree arglist = TREE_OPERAND (exp, 1);
12842 tree arg0, arg1, arg2;
12843 rtx op0, op1, op2, pat;
12844 enum machine_mode tmode, mode0, mode1, mode2;
12845 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12847 switch (fcode)
12849 case IX86_BUILTIN_EMMS:
12850 emit_insn (gen_emms ());
12851 return 0;
12853 case IX86_BUILTIN_SFENCE:
12854 emit_insn (gen_sfence ());
12855 return 0;
12857 case IX86_BUILTIN_PEXTRW:
12858 case IX86_BUILTIN_PEXTRW128:
12859 icode = (fcode == IX86_BUILTIN_PEXTRW
12860 ? CODE_FOR_mmx_pextrw
12861 : CODE_FOR_sse2_pextrw);
12862 arg0 = TREE_VALUE (arglist);
12863 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12864 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12865 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12866 tmode = insn_data[icode].operand[0].mode;
12867 mode0 = insn_data[icode].operand[1].mode;
12868 mode1 = insn_data[icode].operand[2].mode;
12870 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12871 op0 = copy_to_mode_reg (mode0, op0);
12872 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12874 /* @@@ better error message */
12875 error ("selector must be an immediate");
12876 return gen_reg_rtx (tmode);
12878 if (target == 0
12879 || GET_MODE (target) != tmode
12880 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12881 target = gen_reg_rtx (tmode);
12882 pat = GEN_FCN (icode) (target, op0, op1);
12883 if (! pat)
12884 return 0;
12885 emit_insn (pat);
12886 return target;
12888 case IX86_BUILTIN_PINSRW:
12889 case IX86_BUILTIN_PINSRW128:
12890 icode = (fcode == IX86_BUILTIN_PINSRW
12891 ? CODE_FOR_mmx_pinsrw
12892 : CODE_FOR_sse2_pinsrw);
12893 arg0 = TREE_VALUE (arglist);
12894 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12895 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12896 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12897 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12898 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12899 tmode = insn_data[icode].operand[0].mode;
12900 mode0 = insn_data[icode].operand[1].mode;
12901 mode1 = insn_data[icode].operand[2].mode;
12902 mode2 = insn_data[icode].operand[3].mode;
12904 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12905 op0 = copy_to_mode_reg (mode0, op0);
12906 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12907 op1 = copy_to_mode_reg (mode1, op1);
12908 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12910 /* @@@ better error message */
12911 error ("selector must be an immediate");
12912 return const0_rtx;
12914 if (target == 0
12915 || GET_MODE (target) != tmode
12916 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12917 target = gen_reg_rtx (tmode);
12918 pat = GEN_FCN (icode) (target, op0, op1, op2);
12919 if (! pat)
12920 return 0;
12921 emit_insn (pat);
12922 return target;
12924 case IX86_BUILTIN_MASKMOVQ:
12925 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12926 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12927 : CODE_FOR_sse2_maskmovdqu);
12928 /* Note the arg order is different from the operand order. */
12929 arg1 = TREE_VALUE (arglist);
12930 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12931 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12932 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12933 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12934 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12935 mode0 = insn_data[icode].operand[0].mode;
12936 mode1 = insn_data[icode].operand[1].mode;
12937 mode2 = insn_data[icode].operand[2].mode;
12939 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12940 op0 = copy_to_mode_reg (mode0, op0);
12941 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12942 op1 = copy_to_mode_reg (mode1, op1);
12943 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12944 op2 = copy_to_mode_reg (mode2, op2);
12945 pat = GEN_FCN (icode) (op0, op1, op2);
12946 if (! pat)
12947 return 0;
12948 emit_insn (pat);
12949 return 0;
12951 case IX86_BUILTIN_SQRTSS:
12952 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12953 case IX86_BUILTIN_RSQRTSS:
12954 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12955 case IX86_BUILTIN_RCPSS:
12956 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12958 case IX86_BUILTIN_ANDPS:
12959 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12960 arglist, target);
12961 case IX86_BUILTIN_ANDNPS:
12962 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12963 arglist, target);
12964 case IX86_BUILTIN_ORPS:
12965 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12966 arglist, target);
12967 case IX86_BUILTIN_XORPS:
12968 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12969 arglist, target);
12971 case IX86_BUILTIN_LOADAPS:
12972 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12974 case IX86_BUILTIN_LOADUPS:
12975 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12977 case IX86_BUILTIN_STOREAPS:
12978 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12979 case IX86_BUILTIN_STOREUPS:
12980 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12982 case IX86_BUILTIN_LOADSS:
12983 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12985 case IX86_BUILTIN_STORESS:
12986 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12988 case IX86_BUILTIN_LOADHPS:
12989 case IX86_BUILTIN_LOADLPS:
12990 case IX86_BUILTIN_LOADHPD:
12991 case IX86_BUILTIN_LOADLPD:
12992 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12993 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12994 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12995 : CODE_FOR_sse2_movlpd);
12996 arg0 = TREE_VALUE (arglist);
12997 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12998 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12999 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13000 tmode = insn_data[icode].operand[0].mode;
13001 mode0 = insn_data[icode].operand[1].mode;
13002 mode1 = insn_data[icode].operand[2].mode;
13004 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13005 op0 = copy_to_mode_reg (mode0, op0);
13006 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13007 if (target == 0
13008 || GET_MODE (target) != tmode
13009 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13010 target = gen_reg_rtx (tmode);
13011 pat = GEN_FCN (icode) (target, op0, op1);
13012 if (! pat)
13013 return 0;
13014 emit_insn (pat);
13015 return target;
13017 case IX86_BUILTIN_STOREHPS:
13018 case IX86_BUILTIN_STORELPS:
13019 case IX86_BUILTIN_STOREHPD:
13020 case IX86_BUILTIN_STORELPD:
13021 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13022 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13023 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13024 : CODE_FOR_sse2_movlpd);
13025 arg0 = TREE_VALUE (arglist);
13026 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13027 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13028 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13029 mode0 = insn_data[icode].operand[1].mode;
13030 mode1 = insn_data[icode].operand[2].mode;
13032 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13033 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13034 op1 = copy_to_mode_reg (mode1, op1);
13036 pat = GEN_FCN (icode) (op0, op0, op1);
13037 if (! pat)
13038 return 0;
13039 emit_insn (pat);
13040 return 0;
13042 case IX86_BUILTIN_MOVNTPS:
13043 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13044 case IX86_BUILTIN_MOVNTQ:
13045 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13047 case IX86_BUILTIN_LDMXCSR:
13048 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13049 target = assign_386_stack_local (SImode, 0);
13050 emit_move_insn (target, op0);
13051 emit_insn (gen_ldmxcsr (target));
13052 return 0;
13054 case IX86_BUILTIN_STMXCSR:
13055 target = assign_386_stack_local (SImode, 0);
13056 emit_insn (gen_stmxcsr (target));
13057 return copy_to_mode_reg (SImode, target);
13059 case IX86_BUILTIN_SHUFPS:
13060 case IX86_BUILTIN_SHUFPD:
13061 icode = (fcode == IX86_BUILTIN_SHUFPS
13062 ? CODE_FOR_sse_shufps
13063 : CODE_FOR_sse2_shufpd);
13064 arg0 = TREE_VALUE (arglist);
13065 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13066 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13067 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13068 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13069 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13070 tmode = insn_data[icode].operand[0].mode;
13071 mode0 = insn_data[icode].operand[1].mode;
13072 mode1 = insn_data[icode].operand[2].mode;
13073 mode2 = insn_data[icode].operand[3].mode;
13075 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13076 op0 = copy_to_mode_reg (mode0, op0);
13077 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13078 op1 = copy_to_mode_reg (mode1, op1);
13079 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13081 /* @@@ better error message */
13082 error ("mask must be an immediate");
13083 return gen_reg_rtx (tmode);
13085 if (target == 0
13086 || GET_MODE (target) != tmode
13087 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13088 target = gen_reg_rtx (tmode);
13089 pat = GEN_FCN (icode) (target, op0, op1, op2);
13090 if (! pat)
13091 return 0;
13092 emit_insn (pat);
13093 return target;
13095 case IX86_BUILTIN_PSHUFW:
13096 case IX86_BUILTIN_PSHUFD:
13097 case IX86_BUILTIN_PSHUFHW:
13098 case IX86_BUILTIN_PSHUFLW:
13099 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13100 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13101 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13102 : CODE_FOR_mmx_pshufw);
13103 arg0 = TREE_VALUE (arglist);
13104 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13105 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13106 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13107 tmode = insn_data[icode].operand[0].mode;
13108 mode1 = insn_data[icode].operand[1].mode;
13109 mode2 = insn_data[icode].operand[2].mode;
13111 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13112 op0 = copy_to_mode_reg (mode1, op0);
13113 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13115 /* @@@ better error message */
13116 error ("mask must be an immediate");
13117 return const0_rtx;
13119 if (target == 0
13120 || GET_MODE (target) != tmode
13121 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13122 target = gen_reg_rtx (tmode);
13123 pat = GEN_FCN (icode) (target, op0, op1);
13124 if (! pat)
13125 return 0;
13126 emit_insn (pat);
13127 return target;
13129 case IX86_BUILTIN_FEMMS:
13130 emit_insn (gen_femms ());
13131 return NULL_RTX;
13133 case IX86_BUILTIN_PAVGUSB:
13134 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13136 case IX86_BUILTIN_PF2ID:
13137 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13139 case IX86_BUILTIN_PFACC:
13140 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13142 case IX86_BUILTIN_PFADD:
13143 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13145 case IX86_BUILTIN_PFCMPEQ:
13146 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13148 case IX86_BUILTIN_PFCMPGE:
13149 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13151 case IX86_BUILTIN_PFCMPGT:
13152 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13154 case IX86_BUILTIN_PFMAX:
13155 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13157 case IX86_BUILTIN_PFMIN:
13158 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13160 case IX86_BUILTIN_PFMUL:
13161 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13163 case IX86_BUILTIN_PFRCP:
13164 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13166 case IX86_BUILTIN_PFRCPIT1:
13167 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13169 case IX86_BUILTIN_PFRCPIT2:
13170 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13172 case IX86_BUILTIN_PFRSQIT1:
13173 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13175 case IX86_BUILTIN_PFRSQRT:
13176 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13178 case IX86_BUILTIN_PFSUB:
13179 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13181 case IX86_BUILTIN_PFSUBR:
13182 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13184 case IX86_BUILTIN_PI2FD:
13185 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13187 case IX86_BUILTIN_PMULHRW:
13188 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13190 case IX86_BUILTIN_PF2IW:
13191 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13193 case IX86_BUILTIN_PFNACC:
13194 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13196 case IX86_BUILTIN_PFPNACC:
13197 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13199 case IX86_BUILTIN_PI2FW:
13200 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13202 case IX86_BUILTIN_PSWAPDSI:
13203 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13205 case IX86_BUILTIN_PSWAPDSF:
13206 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13208 case IX86_BUILTIN_SSE_ZERO:
13209 target = gen_reg_rtx (V4SFmode);
13210 emit_insn (gen_sse_clrv4sf (target));
13211 return target;
13213 case IX86_BUILTIN_MMX_ZERO:
13214 target = gen_reg_rtx (DImode);
13215 emit_insn (gen_mmx_clrdi (target));
13216 return target;
13218 case IX86_BUILTIN_SQRTSD:
13219 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13220 case IX86_BUILTIN_LOADAPD:
13221 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13222 case IX86_BUILTIN_LOADUPD:
13223 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13225 case IX86_BUILTIN_STOREAPD:
13226 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13227 case IX86_BUILTIN_STOREUPD:
13228 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13230 case IX86_BUILTIN_LOADSD:
13231 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13233 case IX86_BUILTIN_STORESD:
13234 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13236 case IX86_BUILTIN_SETPD1:
13237 target = assign_386_stack_local (DFmode, 0);
13238 arg0 = TREE_VALUE (arglist);
13239 emit_move_insn (adjust_address (target, DFmode, 0),
13240 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13241 op0 = gen_reg_rtx (V2DFmode);
13242 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13243 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13244 return op0;
13246 case IX86_BUILTIN_SETPD:
13247 target = assign_386_stack_local (V2DFmode, 0);
13248 arg0 = TREE_VALUE (arglist);
13249 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13250 emit_move_insn (adjust_address (target, DFmode, 0),
13251 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13252 emit_move_insn (adjust_address (target, DFmode, 8),
13253 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13254 op0 = gen_reg_rtx (V2DFmode);
13255 emit_insn (gen_sse2_movapd (op0, target));
13256 return op0;
13258 case IX86_BUILTIN_LOADRPD:
13259 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13260 gen_reg_rtx (V2DFmode), 1);
13261 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13262 return target;
13264 case IX86_BUILTIN_LOADPD1:
13265 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13266 gen_reg_rtx (V2DFmode), 1);
13267 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13268 return target;
13270 case IX86_BUILTIN_STOREPD1:
13271 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13272 case IX86_BUILTIN_STORERPD:
13273 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13275 case IX86_BUILTIN_MFENCE:
13276 emit_insn (gen_sse2_mfence ());
13277 return 0;
13278 case IX86_BUILTIN_LFENCE:
13279 emit_insn (gen_sse2_lfence ());
13280 return 0;
13282 case IX86_BUILTIN_CLFLUSH:
13283 arg0 = TREE_VALUE (arglist);
13284 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13285 icode = CODE_FOR_sse2_clflush;
13286 mode0 = insn_data[icode].operand[0].mode;
13287 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13288 op0 = copy_to_mode_reg (mode0, op0);
13290 emit_insn (gen_sse2_clflush (op0));
13291 return 0;
13293 case IX86_BUILTIN_MOVNTPD:
13294 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13295 case IX86_BUILTIN_MOVNTDQ:
13296 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13297 case IX86_BUILTIN_MOVNTI:
13298 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13300 default:
13301 break;
13304 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13305 if (d->code == fcode)
13307 /* Compares are treated specially. */
13308 if (d->icode == CODE_FOR_maskcmpv4sf3
13309 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13310 || d->icode == CODE_FOR_maskncmpv4sf3
13311 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13312 || d->icode == CODE_FOR_maskcmpv2df3
13313 || d->icode == CODE_FOR_vmmaskcmpv2df3
13314 || d->icode == CODE_FOR_maskncmpv2df3
13315 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13316 return ix86_expand_sse_compare (d, arglist, target);
13318 return ix86_expand_binop_builtin (d->icode, arglist, target);
13321 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13322 if (d->code == fcode)
13323 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13325 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13326 if (d->code == fcode)
13327 return ix86_expand_sse_comi (d, arglist, target);
13329 /* @@@ Should really do something sensible here. */
13330 return 0;
13333 /* Store OPERAND to the memory after reload is completed. This means
13334 that we can't easily use assign_stack_local. */
13336 ix86_force_to_memory (mode, operand)
13337 enum machine_mode mode;
13338 rtx operand;
13340 rtx result;
13341 if (!reload_completed)
13342 abort ();
13343 if (TARGET_64BIT && TARGET_RED_ZONE)
13345 result = gen_rtx_MEM (mode,
13346 gen_rtx_PLUS (Pmode,
13347 stack_pointer_rtx,
13348 GEN_INT (-RED_ZONE_SIZE)));
13349 emit_move_insn (result, operand);
13351 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13353 switch (mode)
13355 case HImode:
13356 case SImode:
13357 operand = gen_lowpart (DImode, operand);
13358 /* FALLTHRU */
13359 case DImode:
13360 emit_insn (
13361 gen_rtx_SET (VOIDmode,
13362 gen_rtx_MEM (DImode,
13363 gen_rtx_PRE_DEC (DImode,
13364 stack_pointer_rtx)),
13365 operand));
13366 break;
13367 default:
13368 abort ();
13370 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13372 else
13374 switch (mode)
13376 case DImode:
13378 rtx operands[2];
13379 split_di (&operand, 1, operands, operands + 1);
13380 emit_insn (
13381 gen_rtx_SET (VOIDmode,
13382 gen_rtx_MEM (SImode,
13383 gen_rtx_PRE_DEC (Pmode,
13384 stack_pointer_rtx)),
13385 operands[1]));
13386 emit_insn (
13387 gen_rtx_SET (VOIDmode,
13388 gen_rtx_MEM (SImode,
13389 gen_rtx_PRE_DEC (Pmode,
13390 stack_pointer_rtx)),
13391 operands[0]));
13393 break;
13394 case HImode:
13395 /* It is better to store HImodes as SImodes. */
13396 if (!TARGET_PARTIAL_REG_STALL)
13397 operand = gen_lowpart (SImode, operand);
13398 /* FALLTHRU */
13399 case SImode:
13400 emit_insn (
13401 gen_rtx_SET (VOIDmode,
13402 gen_rtx_MEM (GET_MODE (operand),
13403 gen_rtx_PRE_DEC (SImode,
13404 stack_pointer_rtx)),
13405 operand));
13406 break;
13407 default:
13408 abort ();
13410 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13412 return result;
13415 /* Free operand from the memory. */
13416 void
13417 ix86_free_from_memory (mode)
13418 enum machine_mode mode;
13420 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13422 int size;
13424 if (mode == DImode || TARGET_64BIT)
13425 size = 8;
13426 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13427 size = 2;
13428 else
13429 size = 4;
13430 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13431 to pop or add instruction if registers are available. */
13432 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13433 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13434 GEN_INT (size))));
13438 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13439 QImode must go into class Q_REGS.
13440 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13441 movdf to do mem-to-mem moves through integer regs. */
13442 enum reg_class
13443 ix86_preferred_reload_class (x, class)
13444 rtx x;
13445 enum reg_class class;
13447 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13449 /* SSE can't load any constant directly yet. */
13450 if (SSE_CLASS_P (class))
13451 return NO_REGS;
13452 /* Floats can load 0 and 1. */
13453 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13455 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13456 if (MAYBE_SSE_CLASS_P (class))
13457 return (reg_class_subset_p (class, GENERAL_REGS)
13458 ? GENERAL_REGS : FLOAT_REGS);
13459 else
13460 return class;
13462 /* General regs can load everything. */
13463 if (reg_class_subset_p (class, GENERAL_REGS))
13464 return GENERAL_REGS;
13465 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13466 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13467 return NO_REGS;
13469 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13470 return NO_REGS;
13471 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13472 return Q_REGS;
13473 return class;
13476 /* If we are copying between general and FP registers, we need a memory
13477 location. The same is true for SSE and MMX registers.
13479 The macro can't work reliably when one of the CLASSES is class containing
13480 registers from multiple units (SSE, MMX, integer). We avoid this by never
13481 combining those units in single alternative in the machine description.
13482 Ensure that this constraint holds to avoid unexpected surprises.
13484 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13485 enforce these sanity checks. */
13487 ix86_secondary_memory_needed (class1, class2, mode, strict)
13488 enum reg_class class1, class2;
13489 enum machine_mode mode;
13490 int strict;
13492 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13493 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13494 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13495 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13496 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13497 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13499 if (strict)
13500 abort ();
13501 else
13502 return 1;
13504 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13505 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13506 && (mode) != SImode)
13507 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13508 && (mode) != SImode));
13510 /* Return the cost of moving data from a register in class CLASS1 to
13511 one in class CLASS2.
13513 It is not required that the cost always equal 2 when FROM is the same as TO;
13514 on some machines it is expensive to move between registers if they are not
13515 general registers. */
13517 ix86_register_move_cost (mode, class1, class2)
13518 enum machine_mode mode;
13519 enum reg_class class1, class2;
13521 /* In case we require secondary memory, compute cost of the store followed
13522 by load. In case of copying from general_purpose_register we may emit
13523 multiple stores followed by single load causing memory size mismatch
13524 stall. Count this as arbitarily high cost of 20. */
13525 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13527 int add_cost = 0;
13528 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13529 add_cost = 20;
13530 return (MEMORY_MOVE_COST (mode, class1, 0)
13531 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13533 /* Moves between SSE/MMX and integer unit are expensive. */
13534 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13535 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13536 return ix86_cost->mmxsse_to_integer;
13537 if (MAYBE_FLOAT_CLASS_P (class1))
13538 return ix86_cost->fp_move;
13539 if (MAYBE_SSE_CLASS_P (class1))
13540 return ix86_cost->sse_move;
13541 if (MAYBE_MMX_CLASS_P (class1))
13542 return ix86_cost->mmx_move;
13543 return 2;
13546 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13548 ix86_hard_regno_mode_ok (regno, mode)
13549 int regno;
13550 enum machine_mode mode;
13552 /* Flags and only flags can only hold CCmode values. */
13553 if (CC_REGNO_P (regno))
13554 return GET_MODE_CLASS (mode) == MODE_CC;
13555 if (GET_MODE_CLASS (mode) == MODE_CC
13556 || GET_MODE_CLASS (mode) == MODE_RANDOM
13557 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13558 return 0;
13559 if (FP_REGNO_P (regno))
13560 return VALID_FP_MODE_P (mode);
13561 if (SSE_REGNO_P (regno))
13562 return VALID_SSE_REG_MODE (mode);
13563 if (MMX_REGNO_P (regno))
13564 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13565 /* We handle both integer and floats in the general purpose registers.
13566 In future we should be able to handle vector modes as well. */
13567 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13568 return 0;
13569 /* Take care for QImode values - they can be in non-QI regs, but then
13570 they do cause partial register stalls. */
13571 if (regno < 4 || mode != QImode || TARGET_64BIT)
13572 return 1;
13573 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13576 /* Return the cost of moving data of mode M between a
13577 register and memory. A value of 2 is the default; this cost is
13578 relative to those in `REGISTER_MOVE_COST'.
13580 If moving between registers and memory is more expensive than
13581 between two registers, you should define this macro to express the
13582 relative cost.
13584 Model also increased moving costs of QImode registers in non
13585 Q_REGS classes.
13588 ix86_memory_move_cost (mode, class, in)
13589 enum machine_mode mode;
13590 enum reg_class class;
13591 int in;
13593 if (FLOAT_CLASS_P (class))
13595 int index;
13596 switch (mode)
13598 case SFmode:
13599 index = 0;
13600 break;
13601 case DFmode:
13602 index = 1;
13603 break;
13604 case XFmode:
13605 case TFmode:
13606 index = 2;
13607 break;
13608 default:
13609 return 100;
13611 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13613 if (SSE_CLASS_P (class))
13615 int index;
13616 switch (GET_MODE_SIZE (mode))
13618 case 4:
13619 index = 0;
13620 break;
13621 case 8:
13622 index = 1;
13623 break;
13624 case 16:
13625 index = 2;
13626 break;
13627 default:
13628 return 100;
13630 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13632 if (MMX_CLASS_P (class))
13634 int index;
13635 switch (GET_MODE_SIZE (mode))
13637 case 4:
13638 index = 0;
13639 break;
13640 case 8:
13641 index = 1;
13642 break;
13643 default:
13644 return 100;
13646 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13648 switch (GET_MODE_SIZE (mode))
13650 case 1:
13651 if (in)
13652 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13653 : ix86_cost->movzbl_load);
13654 else
13655 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13656 : ix86_cost->int_store[0] + 4);
13657 break;
13658 case 2:
13659 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13660 default:
13661 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13662 if (mode == TFmode)
13663 mode = XFmode;
13664 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13665 * (int) GET_MODE_SIZE (mode) / 4);
13669 #ifdef DO_GLOBAL_CTORS_BODY
13670 static void
13671 ix86_svr3_asm_out_constructor (symbol, priority)
13672 rtx symbol;
13673 int priority ATTRIBUTE_UNUSED;
13675 init_section ();
13676 fputs ("\tpushl $", asm_out_file);
13677 assemble_name (asm_out_file, XSTR (symbol, 0));
13678 fputc ('\n', asm_out_file);
13680 #endif
13682 /* Order the registers for register allocator. */
13684 void
13685 x86_order_regs_for_local_alloc ()
13687 int pos = 0;
13688 int i;
13690 /* First allocate the local general purpose registers. */
13691 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13692 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13693 reg_alloc_order [pos++] = i;
13695 /* Global general purpose registers. */
13696 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13697 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13698 reg_alloc_order [pos++] = i;
13700 /* x87 registers come first in case we are doing FP math
13701 using them. */
13702 if (!TARGET_SSE_MATH)
13703 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13704 reg_alloc_order [pos++] = i;
13706 /* SSE registers. */
13707 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13708 reg_alloc_order [pos++] = i;
13709 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13710 reg_alloc_order [pos++] = i;
13712 /* x87 registerts. */
13713 if (TARGET_SSE_MATH)
13714 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13715 reg_alloc_order [pos++] = i;
13717 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13718 reg_alloc_order [pos++] = i;
13720 /* Initialize the rest of array as we do not allocate some registers
13721 at all. */
13722 while (pos < FIRST_PSEUDO_REGISTER)
13723 reg_alloc_order [pos++] = 0;
13726 void
13727 x86_output_mi_thunk (file, delta, function)
13728 FILE *file;
13729 int delta;
13730 tree function;
13732 tree parm;
13733 rtx xops[3];
13735 if (ix86_regparm > 0)
13736 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13737 else
13738 parm = NULL_TREE;
13739 for (; parm; parm = TREE_CHAIN (parm))
13740 if (TREE_VALUE (parm) == void_type_node)
13741 break;
13743 xops[0] = GEN_INT (delta);
13744 if (TARGET_64BIT)
13746 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13747 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13748 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13749 if (flag_pic)
13751 fprintf (file, "\tjmp *");
13752 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13753 fprintf (file, "@GOTPCREL(%%rip)\n");
13755 else
13757 fprintf (file, "\tjmp ");
13758 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13759 fprintf (file, "\n");
13762 else
13764 if (parm)
13765 xops[1] = gen_rtx_REG (SImode, 0);
13766 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13767 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13768 else
13769 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13770 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13772 if (flag_pic)
13774 xops[0] = pic_offset_table_rtx;
13775 xops[1] = gen_label_rtx ();
13776 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13778 if (ix86_regparm > 2)
13779 abort ();
13780 output_asm_insn ("push{l}\t%0", xops);
13781 output_asm_insn ("call\t%P1", xops);
13782 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13783 output_asm_insn ("pop{l}\t%0", xops);
13784 output_asm_insn
13785 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13786 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13787 output_asm_insn
13788 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13789 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13790 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13792 else
13794 fprintf (file, "\tjmp ");
13795 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13796 fprintf (file, "\n");
13801 #include "gt-i386.h"