* system.h (ENCODE_SECTION_INFO): Poison it.
[official-gcc.git] / gcc / config / i386 / i386.c
blobff77510ab303bf73fc25a8e974262ad9aa202d55
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs *ix86_cost = &pentium_cost;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
380 const int x86_single_stringop = m_386 | m_PENT4;
381 const int x86_qimode_math = ~(0);
382 const int x86_promote_qi_regs = 0;
383 const int x86_himode_math = ~(m_PPRO);
384 const int x86_promote_hi_regs = m_PPRO;
385 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
386 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
387 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
388 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
389 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
390 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
391 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
392 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
393 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_decompose_lea = m_PENT4;
396 const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4;
398 /* In case the avreage insn count for single function invocation is
399 lower than this constant, emit fast (but longer) prologue and
400 epilogue code. */
401 #define FAST_PROLOGUE_INSN_COUNT 30
402 /* Set by prologue expander and used by epilogue expander to determine
403 the style used. */
404 static int use_fast_prologue_epilogue;
406 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
408 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
409 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
410 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
412 /* Array of the smallest class containing reg number REGNO, indexed by
413 REGNO. Used by REGNO_REG_CLASS in i386.h. */
415 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
417 /* ax, dx, cx, bx */
418 AREG, DREG, CREG, BREG,
419 /* si, di, bp, sp */
420 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
421 /* FP registers */
422 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
423 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
424 /* arg pointer */
425 NON_Q_REGS,
426 /* flags, fpsr, dirflag, frame */
427 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
428 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
429 SSE_REGS, SSE_REGS,
430 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
431 MMX_REGS, MMX_REGS,
432 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
433 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
434 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
435 SSE_REGS, SSE_REGS,
438 /* The "default" register map used in 32bit mode. */
440 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
442 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
443 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
444 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
445 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
446 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
447 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
448 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
451 static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
452 1 /*RDX*/, 2 /*RCX*/,
453 FIRST_REX_INT_REG /*R8 */,
454 FIRST_REX_INT_REG + 1 /*R9 */};
455 static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
457 /* The "default" register map used in 64bit mode. */
458 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
460 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
461 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
462 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
463 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
464 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
465 8,9,10,11,12,13,14,15, /* extended integer registers */
466 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
469 /* Define the register numbers to be used in Dwarf debugging information.
470 The SVR4 reference port C compiler uses the following register numbers
471 in its Dwarf output code:
472 0 for %eax (gcc regno = 0)
473 1 for %ecx (gcc regno = 2)
474 2 for %edx (gcc regno = 1)
475 3 for %ebx (gcc regno = 3)
476 4 for %esp (gcc regno = 7)
477 5 for %ebp (gcc regno = 6)
478 6 for %esi (gcc regno = 4)
479 7 for %edi (gcc regno = 5)
480 The following three DWARF register numbers are never generated by
481 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
482 believes these numbers have these meanings.
483 8 for %eip (no gcc equivalent)
484 9 for %eflags (gcc regno = 17)
485 10 for %trapno (no gcc equivalent)
486 It is not at all clear how we should number the FP stack registers
487 for the x86 architecture. If the version of SDB on x86/svr4 were
488 a bit less brain dead with respect to floating-point then we would
489 have a precedent to follow with respect to DWARF register numbers
490 for x86 FP registers, but the SDB on x86/svr4 is so completely
491 broken with respect to FP registers that it is hardly worth thinking
492 of it as something to strive for compatibility with.
493 The version of x86/svr4 SDB I have at the moment does (partially)
494 seem to believe that DWARF register number 11 is associated with
495 the x86 register %st(0), but that's about all. Higher DWARF
496 register numbers don't seem to be associated with anything in
497 particular, and even for DWARF regno 11, SDB only seems to under-
498 stand that it should say that a variable lives in %st(0) (when
499 asked via an `=' command) if we said it was in DWARF regno 11,
500 but SDB still prints garbage when asked for the value of the
501 variable in question (via a `/' command).
502 (Also note that the labels SDB prints for various FP stack regs
503 when doing an `x' command are all wrong.)
504 Note that these problems generally don't affect the native SVR4
505 C compiler because it doesn't allow the use of -O with -g and
506 because when it is *not* optimizing, it allocates a memory
507 location for each floating-point variable, and the memory
508 location is what gets described in the DWARF AT_location
509 attribute for the variable in question.
510 Regardless of the severe mental illness of the x86/svr4 SDB, we
511 do something sensible here and we use the following DWARF
512 register numbers. Note that these are all stack-top-relative
513 numbers.
514 11 for %st(0) (gcc regno = 8)
515 12 for %st(1) (gcc regno = 9)
516 13 for %st(2) (gcc regno = 10)
517 14 for %st(3) (gcc regno = 11)
518 15 for %st(4) (gcc regno = 12)
519 16 for %st(5) (gcc regno = 13)
520 17 for %st(6) (gcc regno = 14)
521 18 for %st(7) (gcc regno = 15)
523 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
525 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
526 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
527 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
528 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
529 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
530 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
531 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
534 /* Test and compare insns in i386.md store the information needed to
535 generate branch and scc insns here. */
537 rtx ix86_compare_op0 = NULL_RTX;
538 rtx ix86_compare_op1 = NULL_RTX;
540 #define MAX_386_STACK_LOCALS 3
541 /* Size of the register save area. */
542 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
544 /* Define the structure for the machine field in struct function. */
545 struct machine_function
547 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
548 int save_varrargs_registers;
549 int accesses_prev_frame;
552 #define ix86_stack_locals (cfun->machine->stack_locals)
553 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
555 /* Structure describing stack frame layout.
556 Stack grows downward:
558 [arguments]
559 <- ARG_POINTER
560 saved pc
562 saved frame pointer if frame_pointer_needed
563 <- HARD_FRAME_POINTER
564 [saved regs]
566 [padding1] \
568 [va_arg registers] (
569 > to_allocate <- FRAME_POINTER
570 [frame] (
572 [padding2] /
574 struct ix86_frame
576 int nregs;
577 int padding1;
578 int va_arg_size;
579 HOST_WIDE_INT frame;
580 int padding2;
581 int outgoing_arguments_size;
582 int red_zone_size;
584 HOST_WIDE_INT to_allocate;
585 /* The offsets relative to ARG_POINTER. */
586 HOST_WIDE_INT frame_pointer_offset;
587 HOST_WIDE_INT hard_frame_pointer_offset;
588 HOST_WIDE_INT stack_pointer_offset;
591 /* Used to enable/disable debugging features. */
592 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
593 /* Code model option as passed by user. */
594 const char *ix86_cmodel_string;
595 /* Parsed value. */
596 enum cmodel ix86_cmodel;
597 /* Asm dialect. */
598 const char *ix86_asm_string;
599 enum asm_dialect ix86_asm_dialect = ASM_ATT;
601 /* which cpu are we scheduling for */
602 enum processor_type ix86_cpu;
604 /* which unit we are generating floating point math for */
605 enum fpmath_unit ix86_fpmath;
607 /* which instruction set architecture to use. */
608 int ix86_arch;
610 /* Strings to hold which cpu and instruction set architecture to use. */
611 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
612 const char *ix86_arch_string; /* for -march=<xxx> */
613 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
615 /* # of registers to use to pass arguments. */
616 const char *ix86_regparm_string;
618 /* true if sse prefetch instruction is not NOOP. */
619 int x86_prefetch_sse;
621 /* ix86_regparm_string as a number */
622 int ix86_regparm;
624 /* Alignment to use for loops and jumps: */
626 /* Power of two alignment for loops. */
627 const char *ix86_align_loops_string;
629 /* Power of two alignment for non-loop jumps. */
630 const char *ix86_align_jumps_string;
632 /* Power of two alignment for stack boundary in bytes. */
633 const char *ix86_preferred_stack_boundary_string;
635 /* Preferred alignment for stack boundary in bits. */
636 int ix86_preferred_stack_boundary;
638 /* Values 1-5: see jump.c */
639 int ix86_branch_cost;
640 const char *ix86_branch_cost_string;
642 /* Power of two alignment for functions. */
643 const char *ix86_align_funcs_string;
645 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
646 static char internal_label_prefix[16];
647 static int internal_label_prefix_len;
649 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
650 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
651 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
652 int, int, FILE *));
653 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
654 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
655 rtx *, rtx *));
656 static rtx gen_push PARAMS ((rtx));
657 static int memory_address_length PARAMS ((rtx addr));
658 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
659 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
660 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
661 static void ix86_dump_ppro_packet PARAMS ((FILE *));
662 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
663 static void ix86_init_machine_status PARAMS ((struct function *));
664 static void ix86_mark_machine_status PARAMS ((struct function *));
665 static void ix86_free_machine_status PARAMS ((struct function *));
666 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
667 static int ix86_nsaved_regs PARAMS ((void));
668 static void ix86_emit_save_regs PARAMS ((void));
669 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
670 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
671 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
672 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
673 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
674 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
675 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
676 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
677 static int ix86_issue_rate PARAMS ((void));
678 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
679 static void ix86_sched_init PARAMS ((FILE *, int, int));
680 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
681 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
682 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
683 static int ia32_multipass_dfa_lookahead PARAMS ((void));
684 static void ix86_init_mmx_sse_builtins PARAMS ((void));
686 struct ix86_address
688 rtx base, index, disp;
689 HOST_WIDE_INT scale;
692 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
694 static void i386_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
696 struct builtin_description;
697 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
698 tree, rtx));
699 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
700 tree, rtx));
701 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
702 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
703 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
704 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
705 tree, rtx));
706 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
707 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
708 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
709 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
710 enum rtx_code *,
711 enum rtx_code *,
712 enum rtx_code *));
713 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
714 rtx *, rtx *));
715 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
716 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
717 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
718 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
719 static int ix86_save_reg PARAMS ((unsigned int, int));
720 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
721 static int ix86_comp_type_attributes PARAMS ((tree, tree));
722 const struct attribute_spec ix86_attribute_table[];
723 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
724 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
726 #ifdef DO_GLOBAL_CTORS_BODY
727 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
728 #endif
730 /* Register class used for passing given 64bit part of the argument.
731 These represent classes as documented by the PS ABI, with the exception
732 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
733 use SF or DFmode move instead of DImode to avoid reformating penalties.
735 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
736 whenever possible (upper half does contain padding).
738 enum x86_64_reg_class
740 X86_64_NO_CLASS,
741 X86_64_INTEGER_CLASS,
742 X86_64_INTEGERSI_CLASS,
743 X86_64_SSE_CLASS,
744 X86_64_SSESF_CLASS,
745 X86_64_SSEDF_CLASS,
746 X86_64_SSEUP_CLASS,
747 X86_64_X87_CLASS,
748 X86_64_X87UP_CLASS,
749 X86_64_MEMORY_CLASS
751 static const char * const x86_64_reg_class_name[] =
752 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
754 #define MAX_CLASSES 4
755 static int classify_argument PARAMS ((enum machine_mode, tree,
756 enum x86_64_reg_class [MAX_CLASSES],
757 int));
758 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
759 int *));
760 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
761 const int *, int));
762 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
763 enum x86_64_reg_class));
765 /* Initialize the GCC target structure. */
766 #undef TARGET_ATTRIBUTE_TABLE
767 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
768 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
769 # undef TARGET_MERGE_DECL_ATTRIBUTES
770 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
771 #endif
773 #undef TARGET_COMP_TYPE_ATTRIBUTES
774 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
776 #undef TARGET_INIT_BUILTINS
777 #define TARGET_INIT_BUILTINS ix86_init_builtins
779 #undef TARGET_EXPAND_BUILTIN
780 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
782 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
783 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
784 HOST_WIDE_INT));
785 # undef TARGET_ASM_FUNCTION_PROLOGUE
786 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
787 #endif
789 #undef TARGET_ASM_OPEN_PAREN
790 #define TARGET_ASM_OPEN_PAREN ""
791 #undef TARGET_ASM_CLOSE_PAREN
792 #define TARGET_ASM_CLOSE_PAREN ""
794 #undef TARGET_ASM_ALIGNED_HI_OP
795 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
796 #undef TARGET_ASM_ALIGNED_SI_OP
797 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
798 #ifdef ASM_QUAD
799 #undef TARGET_ASM_ALIGNED_DI_OP
800 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
801 #endif
803 #undef TARGET_ASM_UNALIGNED_HI_OP
804 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
805 #undef TARGET_ASM_UNALIGNED_SI_OP
806 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
807 #undef TARGET_ASM_UNALIGNED_DI_OP
808 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
810 #undef TARGET_SCHED_ADJUST_COST
811 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
812 #undef TARGET_SCHED_ISSUE_RATE
813 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
814 #undef TARGET_SCHED_VARIABLE_ISSUE
815 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
816 #undef TARGET_SCHED_INIT
817 #define TARGET_SCHED_INIT ix86_sched_init
818 #undef TARGET_SCHED_REORDER
819 #define TARGET_SCHED_REORDER ix86_sched_reorder
820 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
821 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
822 ia32_use_dfa_pipeline_interface
823 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
824 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
825 ia32_multipass_dfa_lookahead
827 struct gcc_target targetm = TARGET_INITIALIZER;
829 /* Sometimes certain combinations of command options do not make
830 sense on a particular target machine. You can define a macro
831 `OVERRIDE_OPTIONS' to take account of this. This macro, if
832 defined, is executed once just after all the command options have
833 been parsed.
835 Don't use this macro to turn on various extra optimizations for
836 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
838 void
839 override_options ()
841 int i;
842 /* Comes from final.c -- no real reason to change it. */
843 #define MAX_CODE_ALIGN 16
845 static struct ptt
847 const struct processor_costs *cost; /* Processor costs */
848 const int target_enable; /* Target flags to enable. */
849 const int target_disable; /* Target flags to disable. */
850 const int align_loop; /* Default alignments. */
851 const int align_loop_max_skip;
852 const int align_jump;
853 const int align_jump_max_skip;
854 const int align_func;
855 const int branch_cost;
857 const processor_target_table[PROCESSOR_max] =
859 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
860 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
861 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
862 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
863 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
864 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
865 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
868 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
869 static struct pta
871 const char *const name; /* processor name or nickname. */
872 const enum processor_type processor;
873 const enum pta_flags
875 PTA_SSE = 1,
876 PTA_SSE2 = 2,
877 PTA_MMX = 4,
878 PTA_PREFETCH_SSE = 8,
879 PTA_3DNOW = 16,
880 PTA_3DNOW_A = 64
881 } flags;
883 const processor_alias_table[] =
885 {"i386", PROCESSOR_I386, 0},
886 {"i486", PROCESSOR_I486, 0},
887 {"i586", PROCESSOR_PENTIUM, 0},
888 {"pentium", PROCESSOR_PENTIUM, 0},
889 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
890 {"i686", PROCESSOR_PENTIUMPRO, 0},
891 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
892 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
893 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
894 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
895 PTA_MMX | PTA_PREFETCH_SSE},
896 {"k6", PROCESSOR_K6, PTA_MMX},
897 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
898 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
899 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
900 | PTA_3DNOW_A},
901 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
902 | PTA_3DNOW | PTA_3DNOW_A},
903 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
904 | PTA_3DNOW_A | PTA_SSE},
905 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
906 | PTA_3DNOW_A | PTA_SSE},
907 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
908 | PTA_3DNOW_A | PTA_SSE},
911 int const pta_size = ARRAY_SIZE (processor_alias_table);
913 #ifdef SUBTARGET_OVERRIDE_OPTIONS
914 SUBTARGET_OVERRIDE_OPTIONS;
915 #endif
917 if (!ix86_cpu_string && ix86_arch_string)
918 ix86_cpu_string = ix86_arch_string;
919 if (!ix86_cpu_string)
920 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
921 if (!ix86_arch_string)
922 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
924 if (ix86_cmodel_string != 0)
926 if (!strcmp (ix86_cmodel_string, "small"))
927 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
928 else if (flag_pic)
929 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
930 else if (!strcmp (ix86_cmodel_string, "32"))
931 ix86_cmodel = CM_32;
932 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
933 ix86_cmodel = CM_KERNEL;
934 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
935 ix86_cmodel = CM_MEDIUM;
936 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
937 ix86_cmodel = CM_LARGE;
938 else
939 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
941 else
943 ix86_cmodel = CM_32;
944 if (TARGET_64BIT)
945 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
947 if (ix86_asm_string != 0)
949 if (!strcmp (ix86_asm_string, "intel"))
950 ix86_asm_dialect = ASM_INTEL;
951 else if (!strcmp (ix86_asm_string, "att"))
952 ix86_asm_dialect = ASM_ATT;
953 else
954 error ("bad value (%s) for -masm= switch", ix86_asm_string);
956 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
957 error ("code model `%s' not supported in the %s bit mode",
958 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
959 if (ix86_cmodel == CM_LARGE)
960 sorry ("code model `large' not supported yet");
961 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
962 sorry ("%i-bit mode not compiled in",
963 (target_flags & MASK_64BIT) ? 64 : 32);
965 for (i = 0; i < pta_size; i++)
966 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
968 ix86_arch = processor_alias_table[i].processor;
969 /* Default cpu tuning to the architecture. */
970 ix86_cpu = ix86_arch;
971 if (processor_alias_table[i].flags & PTA_MMX
972 && !(target_flags & MASK_MMX_SET))
973 target_flags |= MASK_MMX;
974 if (processor_alias_table[i].flags & PTA_3DNOW
975 && !(target_flags & MASK_3DNOW_SET))
976 target_flags |= MASK_3DNOW;
977 if (processor_alias_table[i].flags & PTA_3DNOW_A
978 && !(target_flags & MASK_3DNOW_A_SET))
979 target_flags |= MASK_3DNOW_A;
980 if (processor_alias_table[i].flags & PTA_SSE
981 && !(target_flags & MASK_SSE_SET))
982 target_flags |= MASK_SSE;
983 if (processor_alias_table[i].flags & PTA_SSE2
984 && !(target_flags & MASK_SSE2_SET))
985 target_flags |= MASK_SSE2;
986 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
987 x86_prefetch_sse = true;
988 break;
991 if (i == pta_size)
992 error ("bad value (%s) for -march= switch", ix86_arch_string);
994 for (i = 0; i < pta_size; i++)
995 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
997 ix86_cpu = processor_alias_table[i].processor;
998 break;
1000 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1001 x86_prefetch_sse = true;
1002 if (i == pta_size)
1003 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1005 if (optimize_size)
1006 ix86_cost = &size_cost;
1007 else
1008 ix86_cost = processor_target_table[ix86_cpu].cost;
1009 target_flags |= processor_target_table[ix86_cpu].target_enable;
1010 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1012 /* Arrange to set up i386_stack_locals for all functions. */
1013 init_machine_status = ix86_init_machine_status;
1014 mark_machine_status = ix86_mark_machine_status;
1015 free_machine_status = ix86_free_machine_status;
1017 /* Validate -mregparm= value. */
1018 if (ix86_regparm_string)
1020 i = atoi (ix86_regparm_string);
1021 if (i < 0 || i > REGPARM_MAX)
1022 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1023 else
1024 ix86_regparm = i;
1026 else
1027 if (TARGET_64BIT)
1028 ix86_regparm = REGPARM_MAX;
1030 /* If the user has provided any of the -malign-* options,
1031 warn and use that value only if -falign-* is not set.
1032 Remove this code in GCC 3.2 or later. */
1033 if (ix86_align_loops_string)
1035 warning ("-malign-loops is obsolete, use -falign-loops");
1036 if (align_loops == 0)
1038 i = atoi (ix86_align_loops_string);
1039 if (i < 0 || i > MAX_CODE_ALIGN)
1040 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1041 else
1042 align_loops = 1 << i;
1046 if (ix86_align_jumps_string)
1048 warning ("-malign-jumps is obsolete, use -falign-jumps");
1049 if (align_jumps == 0)
1051 i = atoi (ix86_align_jumps_string);
1052 if (i < 0 || i > MAX_CODE_ALIGN)
1053 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1054 else
1055 align_jumps = 1 << i;
1059 if (ix86_align_funcs_string)
1061 warning ("-malign-functions is obsolete, use -falign-functions");
1062 if (align_functions == 0)
1064 i = atoi (ix86_align_funcs_string);
1065 if (i < 0 || i > MAX_CODE_ALIGN)
1066 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1067 else
1068 align_functions = 1 << i;
1072 /* Default align_* from the processor table. */
1073 if (align_loops == 0)
1075 align_loops = processor_target_table[ix86_cpu].align_loop;
1076 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1078 if (align_jumps == 0)
1080 align_jumps = processor_target_table[ix86_cpu].align_jump;
1081 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1083 if (align_functions == 0)
1085 align_functions = processor_target_table[ix86_cpu].align_func;
1088 /* Validate -mpreferred-stack-boundary= value, or provide default.
1089 The default of 128 bits is for Pentium III's SSE __m128, but we
1090 don't want additional code to keep the stack aligned when
1091 optimizing for code size. */
1092 ix86_preferred_stack_boundary = (optimize_size
1093 ? TARGET_64BIT ? 64 : 32
1094 : 128);
1095 if (ix86_preferred_stack_boundary_string)
1097 i = atoi (ix86_preferred_stack_boundary_string);
1098 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1099 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1100 TARGET_64BIT ? 3 : 2);
1101 else
1102 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1105 /* Validate -mbranch-cost= value, or provide default. */
1106 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1107 if (ix86_branch_cost_string)
1109 i = atoi (ix86_branch_cost_string);
1110 if (i < 0 || i > 5)
1111 error ("-mbranch-cost=%d is not between 0 and 5", i);
1112 else
1113 ix86_branch_cost = i;
1116 /* Keep nonleaf frame pointers. */
1117 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1118 flag_omit_frame_pointer = 1;
1120 /* If we're doing fast math, we don't care about comparison order
1121 wrt NaNs. This lets us use a shorter comparison sequence. */
1122 if (flag_unsafe_math_optimizations)
1123 target_flags &= ~MASK_IEEE_FP;
1125 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1126 since the insns won't need emulation. */
1127 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1128 target_flags &= ~MASK_NO_FANCY_MATH_387;
1130 if (TARGET_64BIT)
1132 if (TARGET_ALIGN_DOUBLE)
1133 error ("-malign-double makes no sense in the 64bit mode");
1134 if (TARGET_RTD)
1135 error ("-mrtd calling convention not supported in the 64bit mode");
1136 /* Enable by default the SSE and MMX builtins. */
1137 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1138 ix86_fpmath = FPMATH_SSE;
1140 else
1141 ix86_fpmath = FPMATH_387;
1143 if (ix86_fpmath_string != 0)
1145 if (! strcmp (ix86_fpmath_string, "387"))
1146 ix86_fpmath = FPMATH_387;
1147 else if (! strcmp (ix86_fpmath_string, "sse"))
1149 if (!TARGET_SSE)
1151 warning ("SSE instruction set disabled, using 387 arithmetics");
1152 ix86_fpmath = FPMATH_387;
1154 else
1155 ix86_fpmath = FPMATH_SSE;
1157 else if (! strcmp (ix86_fpmath_string, "387,sse")
1158 || ! strcmp (ix86_fpmath_string, "sse,387"))
1160 if (!TARGET_SSE)
1162 warning ("SSE instruction set disabled, using 387 arithmetics");
1163 ix86_fpmath = FPMATH_387;
1165 else if (!TARGET_80387)
1167 warning ("387 instruction set disabled, using SSE arithmetics");
1168 ix86_fpmath = FPMATH_SSE;
1170 else
1171 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1173 else
1174 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1177 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1178 on by -msse. */
1179 if (TARGET_SSE)
1181 target_flags |= MASK_MMX;
1182 x86_prefetch_sse = true;
1185 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1186 if (TARGET_3DNOW)
1188 target_flags |= MASK_MMX;
1189 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1190 extensions it adds. */
1191 if (x86_3dnow_a & (1 << ix86_arch))
1192 target_flags |= MASK_3DNOW_A;
1194 if ((x86_accumulate_outgoing_args & CPUMASK)
1195 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1196 && !optimize_size)
1197 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1199 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1201 char *p;
1202 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1203 p = strchr (internal_label_prefix, 'X');
1204 internal_label_prefix_len = p - internal_label_prefix;
1205 *p = '\0';
1209 void
1210 optimization_options (level, size)
1211 int level;
1212 int size ATTRIBUTE_UNUSED;
1214 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1215 make the problem with not enough registers even worse. */
1216 #ifdef INSN_SCHEDULING
1217 if (level > 1)
1218 flag_schedule_insns = 0;
1219 #endif
1220 if (TARGET_64BIT && optimize >= 1)
1221 flag_omit_frame_pointer = 1;
1222 if (TARGET_64BIT)
1224 flag_pcc_struct_return = 0;
1225 flag_asynchronous_unwind_tables = 1;
1229 /* Table of valid machine attributes. */
1230 const struct attribute_spec ix86_attribute_table[] =
1232 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1233 /* Stdcall attribute says callee is responsible for popping arguments
1234 if they are not variable. */
1235 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1236 /* Cdecl attribute says the callee is a normal C declaration */
1237 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1238 /* Regparm attribute specifies how many integer arguments are to be
1239 passed in registers. */
1240 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1241 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1242 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1243 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1244 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1245 #endif
1246 { NULL, 0, 0, false, false, false, NULL }
1249 /* Handle a "cdecl" or "stdcall" attribute;
1250 arguments as in struct attribute_spec.handler. */
1251 static tree
1252 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1253 tree *node;
1254 tree name;
1255 tree args ATTRIBUTE_UNUSED;
1256 int flags ATTRIBUTE_UNUSED;
1257 bool *no_add_attrs;
1259 if (TREE_CODE (*node) != FUNCTION_TYPE
1260 && TREE_CODE (*node) != METHOD_TYPE
1261 && TREE_CODE (*node) != FIELD_DECL
1262 && TREE_CODE (*node) != TYPE_DECL)
1264 warning ("`%s' attribute only applies to functions",
1265 IDENTIFIER_POINTER (name));
1266 *no_add_attrs = true;
1269 if (TARGET_64BIT)
1271 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1272 *no_add_attrs = true;
1275 return NULL_TREE;
1278 /* Handle a "regparm" attribute;
1279 arguments as in struct attribute_spec.handler. */
1280 static tree
1281 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1282 tree *node;
1283 tree name;
1284 tree args;
1285 int flags ATTRIBUTE_UNUSED;
1286 bool *no_add_attrs;
1288 if (TREE_CODE (*node) != FUNCTION_TYPE
1289 && TREE_CODE (*node) != METHOD_TYPE
1290 && TREE_CODE (*node) != FIELD_DECL
1291 && TREE_CODE (*node) != TYPE_DECL)
1293 warning ("`%s' attribute only applies to functions",
1294 IDENTIFIER_POINTER (name));
1295 *no_add_attrs = true;
1297 else
1299 tree cst;
1301 cst = TREE_VALUE (args);
1302 if (TREE_CODE (cst) != INTEGER_CST)
1304 warning ("`%s' attribute requires an integer constant argument",
1305 IDENTIFIER_POINTER (name));
1306 *no_add_attrs = true;
1308 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1310 warning ("argument to `%s' attribute larger than %d",
1311 IDENTIFIER_POINTER (name), REGPARM_MAX);
1312 *no_add_attrs = true;
1316 return NULL_TREE;
1319 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1321 /* Generate the assembly code for function entry. FILE is a stdio
1322 stream to output the code to. SIZE is an int: how many units of
1323 temporary storage to allocate.
1325 Refer to the array `regs_ever_live' to determine which registers to
1326 save; `regs_ever_live[I]' is nonzero if register number I is ever
1327 used in the function. This function is responsible for knowing
1328 which registers should not be saved even if used.
1330 We override it here to allow for the new profiling code to go before
1331 the prologue and the old mcount code to go after the prologue (and
1332 after %ebx has been set up for ELF shared library support). */
1334 static void
1335 ix86_osf_output_function_prologue (file, size)
1336 FILE *file;
1337 HOST_WIDE_INT size;
1339 const char *prefix = "";
1340 const char *const lprefix = LPREFIX;
1341 int labelno = current_function_profile_label_no;
1343 #ifdef OSF_OS
1345 if (TARGET_UNDERSCORES)
1346 prefix = "_";
1348 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1350 if (!flag_pic && !HALF_PIC_P ())
1352 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1353 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1356 else if (HALF_PIC_P ())
1358 rtx symref;
1360 HALF_PIC_EXTERNAL ("_mcount_ptr");
1361 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1362 "_mcount_ptr"));
1364 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1365 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1366 XSTR (symref, 0));
1367 fprintf (file, "\tcall *(%%eax)\n");
1370 else
1372 static int call_no = 0;
1374 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1375 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1376 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1377 lprefix, call_no++);
1378 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1379 lprefix, labelno);
1380 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1381 prefix);
1382 fprintf (file, "\tcall *(%%eax)\n");
1386 #else /* !OSF_OS */
1388 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1390 if (!flag_pic)
1392 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1393 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1396 else
1398 static int call_no = 0;
1400 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1401 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1402 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1403 lprefix, call_no++);
1404 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1405 lprefix, labelno);
1406 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1407 prefix);
1408 fprintf (file, "\tcall *(%%eax)\n");
1411 #endif /* !OSF_OS */
1413 function_prologue (file, size);
1416 #endif /* OSF_OS || TARGET_OSF1ELF */
1418 /* Return 0 if the attributes for two types are incompatible, 1 if they
1419 are compatible, and 2 if they are nearly compatible (which causes a
1420 warning to be generated). */
1422 static int
1423 ix86_comp_type_attributes (type1, type2)
1424 tree type1;
1425 tree type2;
1427 /* Check for mismatch of non-default calling convention. */
1428 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1430 if (TREE_CODE (type1) != FUNCTION_TYPE)
1431 return 1;
1433 /* Check for mismatched return types (cdecl vs stdcall). */
1434 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1435 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1436 return 0;
1437 return 1;
1440 /* Value is the number of bytes of arguments automatically
1441 popped when returning from a subroutine call.
1442 FUNDECL is the declaration node of the function (as a tree),
1443 FUNTYPE is the data type of the function (as a tree),
1444 or for a library call it is an identifier node for the subroutine name.
1445 SIZE is the number of bytes of arguments passed on the stack.
1447 On the 80386, the RTD insn may be used to pop them if the number
1448 of args is fixed, but if the number is variable then the caller
1449 must pop them all. RTD can't be used for library calls now
1450 because the library is compiled with the Unix compiler.
1451 Use of RTD is a selectable option, since it is incompatible with
1452 standard Unix calling sequences. If the option is not selected,
1453 the caller must always pop the args.
1455 The attribute stdcall is equivalent to RTD on a per module basis. */
1458 ix86_return_pops_args (fundecl, funtype, size)
1459 tree fundecl;
1460 tree funtype;
1461 int size;
1463 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1465 /* Cdecl functions override -mrtd, and never pop the stack. */
1466 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1468 /* Stdcall functions will pop the stack if not variable args. */
1469 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1470 rtd = 1;
1472 if (rtd
1473 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1474 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1475 == void_type_node)))
1476 return size;
1479 /* Lose any fake structure return argument if it is passed on the stack. */
1480 if (aggregate_value_p (TREE_TYPE (funtype))
1481 && !TARGET_64BIT)
1483 int nregs = ix86_regparm;
1485 if (funtype)
1487 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1489 if (attr)
1490 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1493 if (!nregs)
1494 return GET_MODE_SIZE (Pmode);
1497 return 0;
1500 /* Argument support functions. */
1502 /* Return true when register may be used to pass function parameters. */
1503 bool
1504 ix86_function_arg_regno_p (regno)
1505 int regno;
1507 int i;
1508 if (!TARGET_64BIT)
1509 return (regno < REGPARM_MAX
1510 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1511 if (SSE_REGNO_P (regno) && TARGET_SSE)
1512 return true;
1513 /* RAX is used as hidden argument to va_arg functions. */
1514 if (!regno)
1515 return true;
1516 for (i = 0; i < REGPARM_MAX; i++)
1517 if (regno == x86_64_int_parameter_registers[i])
1518 return true;
1519 return false;
1522 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1523 for a call to a function whose data type is FNTYPE.
1524 For a library call, FNTYPE is 0. */
1526 void
1527 init_cumulative_args (cum, fntype, libname)
1528 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1529 tree fntype; /* tree ptr for function decl */
1530 rtx libname; /* SYMBOL_REF of library name or 0 */
1532 static CUMULATIVE_ARGS zero_cum;
1533 tree param, next_param;
1535 if (TARGET_DEBUG_ARG)
1537 fprintf (stderr, "\ninit_cumulative_args (");
1538 if (fntype)
1539 fprintf (stderr, "fntype code = %s, ret code = %s",
1540 tree_code_name[(int) TREE_CODE (fntype)],
1541 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1542 else
1543 fprintf (stderr, "no fntype");
1545 if (libname)
1546 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1549 *cum = zero_cum;
1551 /* Set up the number of registers to use for passing arguments. */
1552 cum->nregs = ix86_regparm;
1553 cum->sse_nregs = SSE_REGPARM_MAX;
1554 if (fntype && !TARGET_64BIT)
1556 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1558 if (attr)
1559 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1561 cum->maybe_vaarg = false;
1563 /* Determine if this function has variable arguments. This is
1564 indicated by the last argument being 'void_type_mode' if there
1565 are no variable arguments. If there are variable arguments, then
1566 we won't pass anything in registers */
1568 if (cum->nregs)
1570 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1571 param != 0; param = next_param)
1573 next_param = TREE_CHAIN (param);
1574 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1576 if (!TARGET_64BIT)
1577 cum->nregs = 0;
1578 cum->maybe_vaarg = true;
1582 if ((!fntype && !libname)
1583 || (fntype && !TYPE_ARG_TYPES (fntype)))
1584 cum->maybe_vaarg = 1;
1586 if (TARGET_DEBUG_ARG)
1587 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1589 return;
1592 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1593 of this code is to classify each 8bytes of incoming argument by the register
1594 class and assign registers accordingly. */
1596 /* Return the union class of CLASS1 and CLASS2.
1597 See the x86-64 PS ABI for details. */
1599 static enum x86_64_reg_class
1600 merge_classes (class1, class2)
1601 enum x86_64_reg_class class1, class2;
1603 /* Rule #1: If both classes are equal, this is the resulting class. */
1604 if (class1 == class2)
1605 return class1;
1607 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1608 the other class. */
1609 if (class1 == X86_64_NO_CLASS)
1610 return class2;
1611 if (class2 == X86_64_NO_CLASS)
1612 return class1;
1614 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1615 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1616 return X86_64_MEMORY_CLASS;
1618 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1619 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1620 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1621 return X86_64_INTEGERSI_CLASS;
1622 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1623 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1624 return X86_64_INTEGER_CLASS;
1626 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1627 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1628 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1629 return X86_64_MEMORY_CLASS;
1631 /* Rule #6: Otherwise class SSE is used. */
1632 return X86_64_SSE_CLASS;
1635 /* Classify the argument of type TYPE and mode MODE.
1636 CLASSES will be filled by the register class used to pass each word
1637 of the operand. The number of words is returned. In case the parameter
1638 should be passed in memory, 0 is returned. As a special case for zero
1639 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1641 BIT_OFFSET is used internally for handling records and specifies offset
1642 of the offset in bits modulo 256 to avoid overflow cases.
1644 See the x86-64 PS ABI for details.
1647 static int
1648 classify_argument (mode, type, classes, bit_offset)
1649 enum machine_mode mode;
1650 tree type;
1651 enum x86_64_reg_class classes[MAX_CLASSES];
1652 int bit_offset;
1654 int bytes =
1655 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1656 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1658 if (type && AGGREGATE_TYPE_P (type))
1660 int i;
1661 tree field;
1662 enum x86_64_reg_class subclasses[MAX_CLASSES];
1664 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1665 if (bytes > 16)
1666 return 0;
1668 for (i = 0; i < words; i++)
1669 classes[i] = X86_64_NO_CLASS;
1671 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1672 signalize memory class, so handle it as special case. */
1673 if (!words)
1675 classes[0] = X86_64_NO_CLASS;
1676 return 1;
1679 /* Classify each field of record and merge classes. */
1680 if (TREE_CODE (type) == RECORD_TYPE)
1682 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1684 if (TREE_CODE (field) == FIELD_DECL)
1686 int num;
1688 /* Bitfields are always classified as integer. Handle them
1689 early, since later code would consider them to be
1690 misaligned integers. */
1691 if (DECL_BIT_FIELD (field))
1693 for (i = int_bit_position (field) / 8 / 8;
1694 i < (int_bit_position (field)
1695 + tree_low_cst (DECL_SIZE (field), 0)
1696 + 63) / 8 / 8; i++)
1697 classes[i] =
1698 merge_classes (X86_64_INTEGER_CLASS,
1699 classes[i]);
1701 else
1703 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1704 TREE_TYPE (field), subclasses,
1705 (int_bit_position (field)
1706 + bit_offset) % 256);
1707 if (!num)
1708 return 0;
1709 for (i = 0; i < num; i++)
1711 int pos =
1712 (int_bit_position (field) + bit_offset) / 8 / 8;
1713 classes[i + pos] =
1714 merge_classes (subclasses[i], classes[i + pos]);
1720 /* Arrays are handled as small records. */
1721 else if (TREE_CODE (type) == ARRAY_TYPE)
1723 int num;
1724 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1725 TREE_TYPE (type), subclasses, bit_offset);
1726 if (!num)
1727 return 0;
1729 /* The partial classes are now full classes. */
1730 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1731 subclasses[0] = X86_64_SSE_CLASS;
1732 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1733 subclasses[0] = X86_64_INTEGER_CLASS;
1735 for (i = 0; i < words; i++)
1736 classes[i] = subclasses[i % num];
1738 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1739 else if (TREE_CODE (type) == UNION_TYPE
1740 || TREE_CODE (type) == QUAL_UNION_TYPE)
1742 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1744 if (TREE_CODE (field) == FIELD_DECL)
1746 int num;
1747 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1748 TREE_TYPE (field), subclasses,
1749 bit_offset);
1750 if (!num)
1751 return 0;
1752 for (i = 0; i < num; i++)
1753 classes[i] = merge_classes (subclasses[i], classes[i]);
1757 else
1758 abort ();
1760 /* Final merger cleanup. */
1761 for (i = 0; i < words; i++)
1763 /* If one class is MEMORY, everything should be passed in
1764 memory. */
1765 if (classes[i] == X86_64_MEMORY_CLASS)
1766 return 0;
1768 /* The X86_64_SSEUP_CLASS should be always preceded by
1769 X86_64_SSE_CLASS. */
1770 if (classes[i] == X86_64_SSEUP_CLASS
1771 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1772 classes[i] = X86_64_SSE_CLASS;
1774 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1775 if (classes[i] == X86_64_X87UP_CLASS
1776 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1777 classes[i] = X86_64_SSE_CLASS;
1779 return words;
1782 /* Compute alignment needed. We align all types to natural boundaries with
1783 exception of XFmode that is aligned to 64bits. */
1784 if (mode != VOIDmode && mode != BLKmode)
1786 int mode_alignment = GET_MODE_BITSIZE (mode);
1788 if (mode == XFmode)
1789 mode_alignment = 128;
1790 else if (mode == XCmode)
1791 mode_alignment = 256;
1792 /* Misaligned fields are always returned in memory. */
1793 if (bit_offset % mode_alignment)
1794 return 0;
1797 /* Classification of atomic types. */
1798 switch (mode)
1800 case DImode:
1801 case SImode:
1802 case HImode:
1803 case QImode:
1804 case CSImode:
1805 case CHImode:
1806 case CQImode:
1807 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1808 classes[0] = X86_64_INTEGERSI_CLASS;
1809 else
1810 classes[0] = X86_64_INTEGER_CLASS;
1811 return 1;
1812 case CDImode:
1813 case TImode:
1814 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1815 return 2;
1816 case CTImode:
1817 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1818 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1819 return 4;
1820 case SFmode:
1821 if (!(bit_offset % 64))
1822 classes[0] = X86_64_SSESF_CLASS;
1823 else
1824 classes[0] = X86_64_SSE_CLASS;
1825 return 1;
1826 case DFmode:
1827 classes[0] = X86_64_SSEDF_CLASS;
1828 return 1;
1829 case TFmode:
1830 classes[0] = X86_64_X87_CLASS;
1831 classes[1] = X86_64_X87UP_CLASS;
1832 return 2;
1833 case TCmode:
1834 classes[0] = X86_64_X87_CLASS;
1835 classes[1] = X86_64_X87UP_CLASS;
1836 classes[2] = X86_64_X87_CLASS;
1837 classes[3] = X86_64_X87UP_CLASS;
1838 return 4;
1839 case DCmode:
1840 classes[0] = X86_64_SSEDF_CLASS;
1841 classes[1] = X86_64_SSEDF_CLASS;
1842 return 2;
1843 case SCmode:
1844 classes[0] = X86_64_SSE_CLASS;
1845 return 1;
1846 case V4SFmode:
1847 case V4SImode:
1848 classes[0] = X86_64_SSE_CLASS;
1849 classes[1] = X86_64_SSEUP_CLASS;
1850 return 2;
1851 case V2SFmode:
1852 case V2SImode:
1853 case V4HImode:
1854 case V8QImode:
1855 classes[0] = X86_64_SSE_CLASS;
1856 return 1;
1857 case BLKmode:
1858 case VOIDmode:
1859 return 0;
1860 default:
1861 abort ();
1865 /* Examine the argument and return set number of register required in each
1866 class. Return 0 iff parameter should be passed in memory. */
1867 static int
1868 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1869 enum machine_mode mode;
1870 tree type;
1871 int *int_nregs, *sse_nregs;
1872 int in_return;
1874 enum x86_64_reg_class class[MAX_CLASSES];
1875 int n = classify_argument (mode, type, class, 0);
1877 *int_nregs = 0;
1878 *sse_nregs = 0;
1879 if (!n)
1880 return 0;
1881 for (n--; n >= 0; n--)
1882 switch (class[n])
1884 case X86_64_INTEGER_CLASS:
1885 case X86_64_INTEGERSI_CLASS:
1886 (*int_nregs)++;
1887 break;
1888 case X86_64_SSE_CLASS:
1889 case X86_64_SSESF_CLASS:
1890 case X86_64_SSEDF_CLASS:
1891 (*sse_nregs)++;
1892 break;
1893 case X86_64_NO_CLASS:
1894 case X86_64_SSEUP_CLASS:
1895 break;
1896 case X86_64_X87_CLASS:
1897 case X86_64_X87UP_CLASS:
1898 if (!in_return)
1899 return 0;
1900 break;
1901 case X86_64_MEMORY_CLASS:
1902 abort ();
1904 return 1;
1906 /* Construct container for the argument used by GCC interface. See
1907 FUNCTION_ARG for the detailed description. */
1908 static rtx
1909 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1910 enum machine_mode mode;
1911 tree type;
1912 int in_return;
1913 int nintregs, nsseregs;
1914 const int * intreg;
1915 int sse_regno;
1917 enum machine_mode tmpmode;
1918 int bytes =
1919 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1920 enum x86_64_reg_class class[MAX_CLASSES];
1921 int n;
1922 int i;
1923 int nexps = 0;
1924 int needed_sseregs, needed_intregs;
1925 rtx exp[MAX_CLASSES];
1926 rtx ret;
1928 n = classify_argument (mode, type, class, 0);
1929 if (TARGET_DEBUG_ARG)
1931 if (!n)
1932 fprintf (stderr, "Memory class\n");
1933 else
1935 fprintf (stderr, "Classes:");
1936 for (i = 0; i < n; i++)
1938 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1940 fprintf (stderr, "\n");
1943 if (!n)
1944 return NULL;
1945 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1946 return NULL;
1947 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1948 return NULL;
1950 /* First construct simple cases. Avoid SCmode, since we want to use
1951 single register to pass this type. */
1952 if (n == 1 && mode != SCmode)
1953 switch (class[0])
1955 case X86_64_INTEGER_CLASS:
1956 case X86_64_INTEGERSI_CLASS:
1957 return gen_rtx_REG (mode, intreg[0]);
1958 case X86_64_SSE_CLASS:
1959 case X86_64_SSESF_CLASS:
1960 case X86_64_SSEDF_CLASS:
1961 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1962 case X86_64_X87_CLASS:
1963 return gen_rtx_REG (mode, FIRST_STACK_REG);
1964 case X86_64_NO_CLASS:
1965 /* Zero sized array, struct or class. */
1966 return NULL;
1967 default:
1968 abort ();
1970 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1971 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1972 if (n == 2
1973 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1974 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1975 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1976 && class[1] == X86_64_INTEGER_CLASS
1977 && (mode == CDImode || mode == TImode)
1978 && intreg[0] + 1 == intreg[1])
1979 return gen_rtx_REG (mode, intreg[0]);
1980 if (n == 4
1981 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1982 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1983 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1985 /* Otherwise figure out the entries of the PARALLEL. */
1986 for (i = 0; i < n; i++)
1988 switch (class[i])
1990 case X86_64_NO_CLASS:
1991 break;
1992 case X86_64_INTEGER_CLASS:
1993 case X86_64_INTEGERSI_CLASS:
1994 /* Merge TImodes on aligned occassions here too. */
1995 if (i * 8 + 8 > bytes)
1996 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1997 else if (class[i] == X86_64_INTEGERSI_CLASS)
1998 tmpmode = SImode;
1999 else
2000 tmpmode = DImode;
2001 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2002 if (tmpmode == BLKmode)
2003 tmpmode = DImode;
2004 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2005 gen_rtx_REG (tmpmode, *intreg),
2006 GEN_INT (i*8));
2007 intreg++;
2008 break;
2009 case X86_64_SSESF_CLASS:
2010 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2011 gen_rtx_REG (SFmode,
2012 SSE_REGNO (sse_regno)),
2013 GEN_INT (i*8));
2014 sse_regno++;
2015 break;
2016 case X86_64_SSEDF_CLASS:
2017 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2018 gen_rtx_REG (DFmode,
2019 SSE_REGNO (sse_regno)),
2020 GEN_INT (i*8));
2021 sse_regno++;
2022 break;
2023 case X86_64_SSE_CLASS:
2024 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2025 tmpmode = TImode, i++;
2026 else
2027 tmpmode = DImode;
2028 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2029 gen_rtx_REG (tmpmode,
2030 SSE_REGNO (sse_regno)),
2031 GEN_INT (i*8));
2032 sse_regno++;
2033 break;
2034 default:
2035 abort ();
2038 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2039 for (i = 0; i < nexps; i++)
2040 XVECEXP (ret, 0, i) = exp [i];
2041 return ret;
2044 /* Update the data in CUM to advance over an argument
2045 of mode MODE and data type TYPE.
2046 (TYPE is null for libcalls where that information may not be available.) */
2048 void
2049 function_arg_advance (cum, mode, type, named)
2050 CUMULATIVE_ARGS *cum; /* current arg information */
2051 enum machine_mode mode; /* current arg mode */
2052 tree type; /* type of the argument or 0 if lib support */
2053 int named; /* whether or not the argument was named */
2055 int bytes =
2056 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2057 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2059 if (TARGET_DEBUG_ARG)
2060 fprintf (stderr,
2061 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2062 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2063 if (TARGET_64BIT)
2065 int int_nregs, sse_nregs;
2066 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2067 cum->words += words;
2068 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2070 cum->nregs -= int_nregs;
2071 cum->sse_nregs -= sse_nregs;
2072 cum->regno += int_nregs;
2073 cum->sse_regno += sse_nregs;
2075 else
2076 cum->words += words;
2078 else
2080 if (TARGET_SSE && mode == TImode)
2082 cum->sse_words += words;
2083 cum->sse_nregs -= 1;
2084 cum->sse_regno += 1;
2085 if (cum->sse_nregs <= 0)
2087 cum->sse_nregs = 0;
2088 cum->sse_regno = 0;
2091 else
2093 cum->words += words;
2094 cum->nregs -= words;
2095 cum->regno += words;
2097 if (cum->nregs <= 0)
2099 cum->nregs = 0;
2100 cum->regno = 0;
2104 return;
2107 /* Define where to put the arguments to a function.
2108 Value is zero to push the argument on the stack,
2109 or a hard register in which to store the argument.
2111 MODE is the argument's machine mode.
2112 TYPE is the data type of the argument (as a tree).
2113 This is null for libcalls where that information may
2114 not be available.
2115 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2116 the preceding args and about the function being called.
2117 NAMED is nonzero if this argument is a named parameter
2118 (otherwise it is an extra parameter matching an ellipsis). */
2121 function_arg (cum, mode, type, named)
2122 CUMULATIVE_ARGS *cum; /* current arg information */
2123 enum machine_mode mode; /* current arg mode */
2124 tree type; /* type of the argument or 0 if lib support */
2125 int named; /* != 0 for normal args, == 0 for ... args */
2127 rtx ret = NULL_RTX;
2128 int bytes =
2129 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2130 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2132 /* Handle an hidden AL argument containing number of registers for varargs
2133 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2134 any AL settings. */
2135 if (mode == VOIDmode)
2137 if (TARGET_64BIT)
2138 return GEN_INT (cum->maybe_vaarg
2139 ? (cum->sse_nregs < 0
2140 ? SSE_REGPARM_MAX
2141 : cum->sse_regno)
2142 : -1);
2143 else
2144 return constm1_rtx;
2146 if (TARGET_64BIT)
2147 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2148 &x86_64_int_parameter_registers [cum->regno],
2149 cum->sse_regno);
2150 else
2151 switch (mode)
2153 /* For now, pass fp/complex values on the stack. */
2154 default:
2155 break;
2157 case BLKmode:
2158 case DImode:
2159 case SImode:
2160 case HImode:
2161 case QImode:
2162 if (words <= cum->nregs)
2163 ret = gen_rtx_REG (mode, cum->regno);
2164 break;
2165 case TImode:
2166 if (cum->sse_nregs)
2167 ret = gen_rtx_REG (mode, cum->sse_regno);
2168 break;
2171 if (TARGET_DEBUG_ARG)
2173 fprintf (stderr,
2174 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2175 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2177 if (ret)
2178 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2179 else
2180 fprintf (stderr, ", stack");
2182 fprintf (stderr, " )\n");
2185 return ret;
2188 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2189 and type. */
2192 ix86_function_arg_boundary (mode, type)
2193 enum machine_mode mode;
2194 tree type;
2196 int align;
2197 if (!TARGET_64BIT)
2198 return PARM_BOUNDARY;
2199 if (type)
2200 align = TYPE_ALIGN (type);
2201 else
2202 align = GET_MODE_ALIGNMENT (mode);
2203 if (align < PARM_BOUNDARY)
2204 align = PARM_BOUNDARY;
2205 if (align > 128)
2206 align = 128;
2207 return align;
2210 /* Return true if N is a possible register number of function value. */
2211 bool
2212 ix86_function_value_regno_p (regno)
2213 int regno;
2215 if (!TARGET_64BIT)
2217 return ((regno) == 0
2218 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2219 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2221 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2222 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2223 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2226 /* Define how to find the value returned by a function.
2227 VALTYPE is the data type of the value (as a tree).
2228 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2229 otherwise, FUNC is 0. */
2231 ix86_function_value (valtype)
2232 tree valtype;
2234 if (TARGET_64BIT)
2236 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2237 REGPARM_MAX, SSE_REGPARM_MAX,
2238 x86_64_int_return_registers, 0);
2239 /* For zero sized structures, construct_continer return NULL, but we need
2240 to keep rest of compiler happy by returning meaningfull value. */
2241 if (!ret)
2242 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2243 return ret;
2245 else
2246 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2249 /* Return false iff type is returned in memory. */
2251 ix86_return_in_memory (type)
2252 tree type;
2254 int needed_intregs, needed_sseregs;
2255 if (TARGET_64BIT)
2257 return !examine_argument (TYPE_MODE (type), type, 1,
2258 &needed_intregs, &needed_sseregs);
2260 else
2262 if (TYPE_MODE (type) == BLKmode
2263 || (VECTOR_MODE_P (TYPE_MODE (type))
2264 && int_size_in_bytes (type) == 8)
2265 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2266 && TYPE_MODE (type) != TFmode
2267 && !VECTOR_MODE_P (TYPE_MODE (type))))
2268 return 1;
2269 return 0;
2273 /* Define how to find the value returned by a library function
2274 assuming the value has mode MODE. */
2276 ix86_libcall_value (mode)
2277 enum machine_mode mode;
2279 if (TARGET_64BIT)
2281 switch (mode)
2283 case SFmode:
2284 case SCmode:
2285 case DFmode:
2286 case DCmode:
2287 return gen_rtx_REG (mode, FIRST_SSE_REG);
2288 case TFmode:
2289 case TCmode:
2290 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2291 default:
2292 return gen_rtx_REG (mode, 0);
2295 else
2296 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2299 /* Create the va_list data type. */
2301 tree
2302 ix86_build_va_list ()
2304 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2306 /* For i386 we use plain pointer to argument area. */
2307 if (!TARGET_64BIT)
2308 return build_pointer_type (char_type_node);
2310 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2311 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2313 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2314 unsigned_type_node);
2315 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2316 unsigned_type_node);
2317 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2318 ptr_type_node);
2319 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2320 ptr_type_node);
2322 DECL_FIELD_CONTEXT (f_gpr) = record;
2323 DECL_FIELD_CONTEXT (f_fpr) = record;
2324 DECL_FIELD_CONTEXT (f_ovf) = record;
2325 DECL_FIELD_CONTEXT (f_sav) = record;
2327 TREE_CHAIN (record) = type_decl;
2328 TYPE_NAME (record) = type_decl;
2329 TYPE_FIELDS (record) = f_gpr;
2330 TREE_CHAIN (f_gpr) = f_fpr;
2331 TREE_CHAIN (f_fpr) = f_ovf;
2332 TREE_CHAIN (f_ovf) = f_sav;
2334 layout_type (record);
2336 /* The correct type is an array type of one element. */
2337 return build_array_type (record, build_index_type (size_zero_node));
2340 /* Perform any needed actions needed for a function that is receiving a
2341 variable number of arguments.
2343 CUM is as above.
2345 MODE and TYPE are the mode and type of the current parameter.
2347 PRETEND_SIZE is a variable that should be set to the amount of stack
2348 that must be pushed by the prolog to pretend that our caller pushed
2351 Normally, this macro will push all remaining incoming registers on the
2352 stack and set PRETEND_SIZE to the length of the registers pushed. */
2354 void
2355 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2356 CUMULATIVE_ARGS *cum;
2357 enum machine_mode mode;
2358 tree type;
2359 int *pretend_size ATTRIBUTE_UNUSED;
2360 int no_rtl;
2363 CUMULATIVE_ARGS next_cum;
2364 rtx save_area = NULL_RTX, mem;
2365 rtx label;
2366 rtx label_ref;
2367 rtx tmp_reg;
2368 rtx nsse_reg;
2369 int set;
2370 tree fntype;
2371 int stdarg_p;
2372 int i;
2374 if (!TARGET_64BIT)
2375 return;
2377 /* Indicate to allocate space on the stack for varargs save area. */
2378 ix86_save_varrargs_registers = 1;
2380 fntype = TREE_TYPE (current_function_decl);
2381 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2382 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2383 != void_type_node));
2385 /* For varargs, we do not want to skip the dummy va_dcl argument.
2386 For stdargs, we do want to skip the last named argument. */
2387 next_cum = *cum;
2388 if (stdarg_p)
2389 function_arg_advance (&next_cum, mode, type, 1);
2391 if (!no_rtl)
2392 save_area = frame_pointer_rtx;
2394 set = get_varargs_alias_set ();
2396 for (i = next_cum.regno; i < ix86_regparm; i++)
2398 mem = gen_rtx_MEM (Pmode,
2399 plus_constant (save_area, i * UNITS_PER_WORD));
2400 set_mem_alias_set (mem, set);
2401 emit_move_insn (mem, gen_rtx_REG (Pmode,
2402 x86_64_int_parameter_registers[i]));
2405 if (next_cum.sse_nregs)
2407 /* Now emit code to save SSE registers. The AX parameter contains number
2408 of SSE parameter regsiters used to call this function. We use
2409 sse_prologue_save insn template that produces computed jump across
2410 SSE saves. We need some preparation work to get this working. */
2412 label = gen_label_rtx ();
2413 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2415 /* Compute address to jump to :
2416 label - 5*eax + nnamed_sse_arguments*5 */
2417 tmp_reg = gen_reg_rtx (Pmode);
2418 nsse_reg = gen_reg_rtx (Pmode);
2419 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2420 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2421 gen_rtx_MULT (Pmode, nsse_reg,
2422 GEN_INT (4))));
2423 if (next_cum.sse_regno)
2424 emit_move_insn
2425 (nsse_reg,
2426 gen_rtx_CONST (DImode,
2427 gen_rtx_PLUS (DImode,
2428 label_ref,
2429 GEN_INT (next_cum.sse_regno * 4))));
2430 else
2431 emit_move_insn (nsse_reg, label_ref);
2432 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2434 /* Compute address of memory block we save into. We always use pointer
2435 pointing 127 bytes after first byte to store - this is needed to keep
2436 instruction size limited by 4 bytes. */
2437 tmp_reg = gen_reg_rtx (Pmode);
2438 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2439 plus_constant (save_area,
2440 8 * REGPARM_MAX + 127)));
2441 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2442 set_mem_alias_set (mem, set);
2443 set_mem_align (mem, BITS_PER_WORD);
2445 /* And finally do the dirty job! */
2446 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2447 GEN_INT (next_cum.sse_regno), label));
2452 /* Implement va_start. */
2454 void
2455 ix86_va_start (stdarg_p, valist, nextarg)
2456 int stdarg_p;
2457 tree valist;
2458 rtx nextarg;
2460 HOST_WIDE_INT words, n_gpr, n_fpr;
2461 tree f_gpr, f_fpr, f_ovf, f_sav;
2462 tree gpr, fpr, ovf, sav, t;
2464 /* Only 64bit target needs something special. */
2465 if (!TARGET_64BIT)
2467 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2468 return;
2471 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2472 f_fpr = TREE_CHAIN (f_gpr);
2473 f_ovf = TREE_CHAIN (f_fpr);
2474 f_sav = TREE_CHAIN (f_ovf);
2476 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2477 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2478 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2479 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2480 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2482 /* Count number of gp and fp argument registers used. */
2483 words = current_function_args_info.words;
2484 n_gpr = current_function_args_info.regno;
2485 n_fpr = current_function_args_info.sse_regno;
2487 if (TARGET_DEBUG_ARG)
2488 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2489 (int) words, (int) n_gpr, (int) n_fpr);
2491 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2492 build_int_2 (n_gpr * 8, 0));
2493 TREE_SIDE_EFFECTS (t) = 1;
2494 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2496 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2497 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2498 TREE_SIDE_EFFECTS (t) = 1;
2499 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2501 /* Find the overflow area. */
2502 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2503 if (words != 0)
2504 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2505 build_int_2 (words * UNITS_PER_WORD, 0));
2506 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2507 TREE_SIDE_EFFECTS (t) = 1;
2508 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2510 /* Find the register save area.
2511 Prologue of the function save it right above stack frame. */
2512 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2513 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2514 TREE_SIDE_EFFECTS (t) = 1;
2515 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2518 /* Implement va_arg. */
2520 ix86_va_arg (valist, type)
2521 tree valist, type;
2523 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2524 tree f_gpr, f_fpr, f_ovf, f_sav;
2525 tree gpr, fpr, ovf, sav, t;
2526 int size, rsize;
2527 rtx lab_false, lab_over = NULL_RTX;
2528 rtx addr_rtx, r;
2529 rtx container;
2531 /* Only 64bit target needs something special. */
2532 if (!TARGET_64BIT)
2534 return std_expand_builtin_va_arg (valist, type);
2537 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2538 f_fpr = TREE_CHAIN (f_gpr);
2539 f_ovf = TREE_CHAIN (f_fpr);
2540 f_sav = TREE_CHAIN (f_ovf);
2542 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2543 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2544 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2545 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2546 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2548 size = int_size_in_bytes (type);
2549 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2551 container = construct_container (TYPE_MODE (type), type, 0,
2552 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2554 * Pull the value out of the saved registers ...
2557 addr_rtx = gen_reg_rtx (Pmode);
2559 if (container)
2561 rtx int_addr_rtx, sse_addr_rtx;
2562 int needed_intregs, needed_sseregs;
2563 int need_temp;
2565 lab_over = gen_label_rtx ();
2566 lab_false = gen_label_rtx ();
2568 examine_argument (TYPE_MODE (type), type, 0,
2569 &needed_intregs, &needed_sseregs);
2572 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2573 || TYPE_ALIGN (type) > 128);
2575 /* In case we are passing structure, verify that it is consetuctive block
2576 on the register save area. If not we need to do moves. */
2577 if (!need_temp && !REG_P (container))
2579 /* Verify that all registers are strictly consetuctive */
2580 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2582 int i;
2584 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2586 rtx slot = XVECEXP (container, 0, i);
2587 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2588 || INTVAL (XEXP (slot, 1)) != i * 16)
2589 need_temp = 1;
2592 else
2594 int i;
2596 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2598 rtx slot = XVECEXP (container, 0, i);
2599 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2600 || INTVAL (XEXP (slot, 1)) != i * 8)
2601 need_temp = 1;
2605 if (!need_temp)
2607 int_addr_rtx = addr_rtx;
2608 sse_addr_rtx = addr_rtx;
2610 else
2612 int_addr_rtx = gen_reg_rtx (Pmode);
2613 sse_addr_rtx = gen_reg_rtx (Pmode);
2615 /* First ensure that we fit completely in registers. */
2616 if (needed_intregs)
2618 emit_cmp_and_jump_insns (expand_expr
2619 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2620 GEN_INT ((REGPARM_MAX - needed_intregs +
2621 1) * 8), GE, const1_rtx, SImode,
2622 1, lab_false);
2624 if (needed_sseregs)
2626 emit_cmp_and_jump_insns (expand_expr
2627 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2628 GEN_INT ((SSE_REGPARM_MAX -
2629 needed_sseregs + 1) * 16 +
2630 REGPARM_MAX * 8), GE, const1_rtx,
2631 SImode, 1, lab_false);
2634 /* Compute index to start of area used for integer regs. */
2635 if (needed_intregs)
2637 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2638 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2639 if (r != int_addr_rtx)
2640 emit_move_insn (int_addr_rtx, r);
2642 if (needed_sseregs)
2644 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2645 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2646 if (r != sse_addr_rtx)
2647 emit_move_insn (sse_addr_rtx, r);
2649 if (need_temp)
2651 int i;
2652 rtx mem;
2654 /* Never use the memory itself, as it has the alias set. */
2655 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2656 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2657 set_mem_alias_set (mem, get_varargs_alias_set ());
2658 set_mem_align (mem, BITS_PER_UNIT);
2660 for (i = 0; i < XVECLEN (container, 0); i++)
2662 rtx slot = XVECEXP (container, 0, i);
2663 rtx reg = XEXP (slot, 0);
2664 enum machine_mode mode = GET_MODE (reg);
2665 rtx src_addr;
2666 rtx src_mem;
2667 int src_offset;
2668 rtx dest_mem;
2670 if (SSE_REGNO_P (REGNO (reg)))
2672 src_addr = sse_addr_rtx;
2673 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2675 else
2677 src_addr = int_addr_rtx;
2678 src_offset = REGNO (reg) * 8;
2680 src_mem = gen_rtx_MEM (mode, src_addr);
2681 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2682 src_mem = adjust_address (src_mem, mode, src_offset);
2683 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2684 emit_move_insn (dest_mem, src_mem);
2688 if (needed_intregs)
2691 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2692 build_int_2 (needed_intregs * 8, 0));
2693 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2694 TREE_SIDE_EFFECTS (t) = 1;
2695 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2697 if (needed_sseregs)
2700 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2701 build_int_2 (needed_sseregs * 16, 0));
2702 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2703 TREE_SIDE_EFFECTS (t) = 1;
2704 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2707 emit_jump_insn (gen_jump (lab_over));
2708 emit_barrier ();
2709 emit_label (lab_false);
2712 /* ... otherwise out of the overflow area. */
2714 /* Care for on-stack alignment if needed. */
2715 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2716 t = ovf;
2717 else
2719 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2720 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2721 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2723 t = save_expr (t);
2725 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2726 if (r != addr_rtx)
2727 emit_move_insn (addr_rtx, r);
2730 build (PLUS_EXPR, TREE_TYPE (t), t,
2731 build_int_2 (rsize * UNITS_PER_WORD, 0));
2732 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2733 TREE_SIDE_EFFECTS (t) = 1;
2734 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2736 if (container)
2737 emit_label (lab_over);
2739 return addr_rtx;
2742 /* Return nonzero if OP is general operand representable on x86_64. */
2745 x86_64_general_operand (op, mode)
2746 rtx op;
2747 enum machine_mode mode;
2749 if (!TARGET_64BIT)
2750 return general_operand (op, mode);
2751 if (nonimmediate_operand (op, mode))
2752 return 1;
2753 return x86_64_sign_extended_value (op);
2756 /* Return nonzero if OP is general operand representable on x86_64
2757 as either sign extended or zero extended constant. */
2760 x86_64_szext_general_operand (op, mode)
2761 rtx op;
2762 enum machine_mode mode;
2764 if (!TARGET_64BIT)
2765 return general_operand (op, mode);
2766 if (nonimmediate_operand (op, mode))
2767 return 1;
2768 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2771 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2774 x86_64_nonmemory_operand (op, mode)
2775 rtx op;
2776 enum machine_mode mode;
2778 if (!TARGET_64BIT)
2779 return nonmemory_operand (op, mode);
2780 if (register_operand (op, mode))
2781 return 1;
2782 return x86_64_sign_extended_value (op);
2785 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2788 x86_64_movabs_operand (op, mode)
2789 rtx op;
2790 enum machine_mode mode;
2792 if (!TARGET_64BIT || !flag_pic)
2793 return nonmemory_operand (op, mode);
2794 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2795 return 1;
2796 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2797 return 1;
2798 return 0;
2801 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2804 x86_64_szext_nonmemory_operand (op, mode)
2805 rtx op;
2806 enum machine_mode mode;
2808 if (!TARGET_64BIT)
2809 return nonmemory_operand (op, mode);
2810 if (register_operand (op, mode))
2811 return 1;
2812 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2815 /* Return nonzero if OP is immediate operand representable on x86_64. */
2818 x86_64_immediate_operand (op, mode)
2819 rtx op;
2820 enum machine_mode mode;
2822 if (!TARGET_64BIT)
2823 return immediate_operand (op, mode);
2824 return x86_64_sign_extended_value (op);
2827 /* Return nonzero if OP is immediate operand representable on x86_64. */
2830 x86_64_zext_immediate_operand (op, mode)
2831 rtx op;
2832 enum machine_mode mode ATTRIBUTE_UNUSED;
2834 return x86_64_zero_extended_value (op);
2837 /* Return nonzero if OP is (const_int 1), else return zero. */
2840 const_int_1_operand (op, mode)
2841 rtx op;
2842 enum machine_mode mode ATTRIBUTE_UNUSED;
2844 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2847 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2848 reference and a constant. */
2851 symbolic_operand (op, mode)
2852 register rtx op;
2853 enum machine_mode mode ATTRIBUTE_UNUSED;
2855 switch (GET_CODE (op))
2857 case SYMBOL_REF:
2858 case LABEL_REF:
2859 return 1;
2861 case CONST:
2862 op = XEXP (op, 0);
2863 if (GET_CODE (op) == SYMBOL_REF
2864 || GET_CODE (op) == LABEL_REF
2865 || (GET_CODE (op) == UNSPEC
2866 && (XINT (op, 1) == UNSPEC_GOT
2867 || XINT (op, 1) == UNSPEC_GOTOFF
2868 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2869 return 1;
2870 if (GET_CODE (op) != PLUS
2871 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2872 return 0;
2874 op = XEXP (op, 0);
2875 if (GET_CODE (op) == SYMBOL_REF
2876 || GET_CODE (op) == LABEL_REF)
2877 return 1;
2878 /* Only @GOTOFF gets offsets. */
2879 if (GET_CODE (op) != UNSPEC
2880 || XINT (op, 1) != UNSPEC_GOTOFF)
2881 return 0;
2883 op = XVECEXP (op, 0, 0);
2884 if (GET_CODE (op) == SYMBOL_REF
2885 || GET_CODE (op) == LABEL_REF)
2886 return 1;
2887 return 0;
2889 default:
2890 return 0;
2894 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2897 pic_symbolic_operand (op, mode)
2898 register rtx op;
2899 enum machine_mode mode ATTRIBUTE_UNUSED;
2901 if (GET_CODE (op) != CONST)
2902 return 0;
2903 op = XEXP (op, 0);
2904 if (TARGET_64BIT)
2906 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2907 return 1;
2909 else
2911 if (GET_CODE (op) == UNSPEC)
2912 return 1;
2913 if (GET_CODE (op) != PLUS
2914 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2915 return 0;
2916 op = XEXP (op, 0);
2917 if (GET_CODE (op) == UNSPEC)
2918 return 1;
2920 return 0;
2923 /* Return true if OP is a symbolic operand that resolves locally. */
2925 static int
2926 local_symbolic_operand (op, mode)
2927 rtx op;
2928 enum machine_mode mode ATTRIBUTE_UNUSED;
2930 if (GET_CODE (op) == LABEL_REF)
2931 return 1;
2933 if (GET_CODE (op) == CONST
2934 && GET_CODE (XEXP (op, 0)) == PLUS
2935 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2936 op = XEXP (XEXP (op, 0), 0);
2938 if (GET_CODE (op) != SYMBOL_REF)
2939 return 0;
2941 /* These we've been told are local by varasm and encode_section_info
2942 respectively. */
2943 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2944 return 1;
2946 /* There is, however, a not insubstantial body of code in the rest of
2947 the compiler that assumes it can just stick the results of
2948 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2949 /* ??? This is a hack. Should update the body of the compiler to
2950 always create a DECL an invoke targetm.encode_section_info. */
2951 if (strncmp (XSTR (op, 0), internal_label_prefix,
2952 internal_label_prefix_len) == 0)
2953 return 1;
2955 return 0;
2958 /* Test for a valid operand for a call instruction. Don't allow the
2959 arg pointer register or virtual regs since they may decay into
2960 reg + const, which the patterns can't handle. */
2963 call_insn_operand (op, mode)
2964 rtx op;
2965 enum machine_mode mode ATTRIBUTE_UNUSED;
2967 /* Disallow indirect through a virtual register. This leads to
2968 compiler aborts when trying to eliminate them. */
2969 if (GET_CODE (op) == REG
2970 && (op == arg_pointer_rtx
2971 || op == frame_pointer_rtx
2972 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2973 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2974 return 0;
2976 /* Disallow `call 1234'. Due to varying assembler lameness this
2977 gets either rejected or translated to `call .+1234'. */
2978 if (GET_CODE (op) == CONST_INT)
2979 return 0;
2981 /* Explicitly allow SYMBOL_REF even if pic. */
2982 if (GET_CODE (op) == SYMBOL_REF)
2983 return 1;
2985 /* Half-pic doesn't allow anything but registers and constants.
2986 We've just taken care of the later. */
2987 if (HALF_PIC_P ())
2988 return register_operand (op, Pmode);
2990 /* Otherwise we can allow any general_operand in the address. */
2991 return general_operand (op, Pmode);
2995 constant_call_address_operand (op, mode)
2996 rtx op;
2997 enum machine_mode mode ATTRIBUTE_UNUSED;
2999 if (GET_CODE (op) == CONST
3000 && GET_CODE (XEXP (op, 0)) == PLUS
3001 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3002 op = XEXP (XEXP (op, 0), 0);
3003 return GET_CODE (op) == SYMBOL_REF;
3006 /* Match exactly zero and one. */
3009 const0_operand (op, mode)
3010 register rtx op;
3011 enum machine_mode mode;
3013 return op == CONST0_RTX (mode);
3017 const1_operand (op, mode)
3018 register rtx op;
3019 enum machine_mode mode ATTRIBUTE_UNUSED;
3021 return op == const1_rtx;
3024 /* Match 2, 4, or 8. Used for leal multiplicands. */
3027 const248_operand (op, mode)
3028 register rtx op;
3029 enum machine_mode mode ATTRIBUTE_UNUSED;
3031 return (GET_CODE (op) == CONST_INT
3032 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3035 /* True if this is a constant appropriate for an increment or decremenmt. */
3038 incdec_operand (op, mode)
3039 register rtx op;
3040 enum machine_mode mode ATTRIBUTE_UNUSED;
3042 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3043 registers, since carry flag is not set. */
3044 if (TARGET_PENTIUM4 && !optimize_size)
3045 return 0;
3046 return op == const1_rtx || op == constm1_rtx;
3049 /* Return nonzero if OP is acceptable as operand of DImode shift
3050 expander. */
3053 shiftdi_operand (op, mode)
3054 rtx op;
3055 enum machine_mode mode ATTRIBUTE_UNUSED;
3057 if (TARGET_64BIT)
3058 return nonimmediate_operand (op, mode);
3059 else
3060 return register_operand (op, mode);
3063 /* Return false if this is the stack pointer, or any other fake
3064 register eliminable to the stack pointer. Otherwise, this is
3065 a register operand.
3067 This is used to prevent esp from being used as an index reg.
3068 Which would only happen in pathological cases. */
3071 reg_no_sp_operand (op, mode)
3072 register rtx op;
3073 enum machine_mode mode;
3075 rtx t = op;
3076 if (GET_CODE (t) == SUBREG)
3077 t = SUBREG_REG (t);
3078 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3079 return 0;
3081 return register_operand (op, mode);
3085 mmx_reg_operand (op, mode)
3086 register rtx op;
3087 enum machine_mode mode ATTRIBUTE_UNUSED;
3089 return MMX_REG_P (op);
3092 /* Return false if this is any eliminable register. Otherwise
3093 general_operand. */
3096 general_no_elim_operand (op, mode)
3097 register rtx op;
3098 enum machine_mode mode;
3100 rtx t = op;
3101 if (GET_CODE (t) == SUBREG)
3102 t = SUBREG_REG (t);
3103 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3104 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3105 || t == virtual_stack_dynamic_rtx)
3106 return 0;
3107 if (REG_P (t)
3108 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3109 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3110 return 0;
3112 return general_operand (op, mode);
3115 /* Return false if this is any eliminable register. Otherwise
3116 register_operand or const_int. */
3119 nonmemory_no_elim_operand (op, mode)
3120 register rtx op;
3121 enum machine_mode mode;
3123 rtx t = op;
3124 if (GET_CODE (t) == SUBREG)
3125 t = SUBREG_REG (t);
3126 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3127 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3128 || t == virtual_stack_dynamic_rtx)
3129 return 0;
3131 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3134 /* Return true if op is a Q_REGS class register. */
3137 q_regs_operand (op, mode)
3138 register rtx op;
3139 enum machine_mode mode;
3141 if (mode != VOIDmode && GET_MODE (op) != mode)
3142 return 0;
3143 if (GET_CODE (op) == SUBREG)
3144 op = SUBREG_REG (op);
3145 return ANY_QI_REG_P (op);
3148 /* Return true if op is a NON_Q_REGS class register. */
3151 non_q_regs_operand (op, mode)
3152 register rtx op;
3153 enum machine_mode mode;
3155 if (mode != VOIDmode && GET_MODE (op) != mode)
3156 return 0;
3157 if (GET_CODE (op) == SUBREG)
3158 op = SUBREG_REG (op);
3159 return NON_QI_REG_P (op);
3162 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3163 insns. */
3165 sse_comparison_operator (op, mode)
3166 rtx op;
3167 enum machine_mode mode ATTRIBUTE_UNUSED;
3169 enum rtx_code code = GET_CODE (op);
3170 switch (code)
3172 /* Operations supported directly. */
3173 case EQ:
3174 case LT:
3175 case LE:
3176 case UNORDERED:
3177 case NE:
3178 case UNGE:
3179 case UNGT:
3180 case ORDERED:
3181 return 1;
3182 /* These are equivalent to ones above in non-IEEE comparisons. */
3183 case UNEQ:
3184 case UNLT:
3185 case UNLE:
3186 case LTGT:
3187 case GE:
3188 case GT:
3189 return !TARGET_IEEE_FP;
3190 default:
3191 return 0;
3194 /* Return 1 if OP is a valid comparison operator in valid mode. */
3196 ix86_comparison_operator (op, mode)
3197 register rtx op;
3198 enum machine_mode mode;
3200 enum machine_mode inmode;
3201 enum rtx_code code = GET_CODE (op);
3202 if (mode != VOIDmode && GET_MODE (op) != mode)
3203 return 0;
3204 if (GET_RTX_CLASS (code) != '<')
3205 return 0;
3206 inmode = GET_MODE (XEXP (op, 0));
3208 if (inmode == CCFPmode || inmode == CCFPUmode)
3210 enum rtx_code second_code, bypass_code;
3211 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3212 return (bypass_code == NIL && second_code == NIL);
3214 switch (code)
3216 case EQ: case NE:
3217 return 1;
3218 case LT: case GE:
3219 if (inmode == CCmode || inmode == CCGCmode
3220 || inmode == CCGOCmode || inmode == CCNOmode)
3221 return 1;
3222 return 0;
3223 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3224 if (inmode == CCmode)
3225 return 1;
3226 return 0;
3227 case GT: case LE:
3228 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3229 return 1;
3230 return 0;
3231 default:
3232 return 0;
3236 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3239 fcmov_comparison_operator (op, mode)
3240 register rtx op;
3241 enum machine_mode mode;
3243 enum machine_mode inmode;
3244 enum rtx_code code = GET_CODE (op);
3245 if (mode != VOIDmode && GET_MODE (op) != mode)
3246 return 0;
3247 if (GET_RTX_CLASS (code) != '<')
3248 return 0;
3249 inmode = GET_MODE (XEXP (op, 0));
3250 if (inmode == CCFPmode || inmode == CCFPUmode)
3252 enum rtx_code second_code, bypass_code;
3253 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3254 if (bypass_code != NIL || second_code != NIL)
3255 return 0;
3256 code = ix86_fp_compare_code_to_integer (code);
3258 /* i387 supports just limited amount of conditional codes. */
3259 switch (code)
3261 case LTU: case GTU: case LEU: case GEU:
3262 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3263 return 1;
3264 return 0;
3265 case ORDERED: case UNORDERED:
3266 case EQ: case NE:
3267 return 1;
3268 default:
3269 return 0;
3273 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3276 promotable_binary_operator (op, mode)
3277 register rtx op;
3278 enum machine_mode mode ATTRIBUTE_UNUSED;
3280 switch (GET_CODE (op))
3282 case MULT:
3283 /* Modern CPUs have same latency for HImode and SImode multiply,
3284 but 386 and 486 do HImode multiply faster. */
3285 return ix86_cpu > PROCESSOR_I486;
3286 case PLUS:
3287 case AND:
3288 case IOR:
3289 case XOR:
3290 case ASHIFT:
3291 return 1;
3292 default:
3293 return 0;
3297 /* Nearly general operand, but accept any const_double, since we wish
3298 to be able to drop them into memory rather than have them get pulled
3299 into registers. */
3302 cmp_fp_expander_operand (op, mode)
3303 register rtx op;
3304 enum machine_mode mode;
3306 if (mode != VOIDmode && mode != GET_MODE (op))
3307 return 0;
3308 if (GET_CODE (op) == CONST_DOUBLE)
3309 return 1;
3310 return general_operand (op, mode);
3313 /* Match an SI or HImode register for a zero_extract. */
3316 ext_register_operand (op, mode)
3317 register rtx op;
3318 enum machine_mode mode ATTRIBUTE_UNUSED;
3320 int regno;
3321 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3322 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3323 return 0;
3325 if (!register_operand (op, VOIDmode))
3326 return 0;
3328 /* Be curefull to accept only registers having upper parts. */
3329 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3330 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3333 /* Return 1 if this is a valid binary floating-point operation.
3334 OP is the expression matched, and MODE is its mode. */
3337 binary_fp_operator (op, mode)
3338 register rtx op;
3339 enum machine_mode mode;
3341 if (mode != VOIDmode && mode != GET_MODE (op))
3342 return 0;
3344 switch (GET_CODE (op))
3346 case PLUS:
3347 case MINUS:
3348 case MULT:
3349 case DIV:
3350 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3352 default:
3353 return 0;
3358 mult_operator (op, mode)
3359 register rtx op;
3360 enum machine_mode mode ATTRIBUTE_UNUSED;
3362 return GET_CODE (op) == MULT;
3366 div_operator (op, mode)
3367 register rtx op;
3368 enum machine_mode mode ATTRIBUTE_UNUSED;
3370 return GET_CODE (op) == DIV;
3374 arith_or_logical_operator (op, mode)
3375 rtx op;
3376 enum machine_mode mode;
3378 return ((mode == VOIDmode || GET_MODE (op) == mode)
3379 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3380 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3383 /* Returns 1 if OP is memory operand with a displacement. */
3386 memory_displacement_operand (op, mode)
3387 register rtx op;
3388 enum machine_mode mode;
3390 struct ix86_address parts;
3392 if (! memory_operand (op, mode))
3393 return 0;
3395 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3396 abort ();
3398 return parts.disp != NULL_RTX;
3401 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3402 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3404 ??? It seems likely that this will only work because cmpsi is an
3405 expander, and no actual insns use this. */
3408 cmpsi_operand (op, mode)
3409 rtx op;
3410 enum machine_mode mode;
3412 if (nonimmediate_operand (op, mode))
3413 return 1;
3415 if (GET_CODE (op) == AND
3416 && GET_MODE (op) == SImode
3417 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3418 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3419 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3420 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3421 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3422 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3423 return 1;
3425 return 0;
3428 /* Returns 1 if OP is memory operand that can not be represented by the
3429 modRM array. */
3432 long_memory_operand (op, mode)
3433 register rtx op;
3434 enum machine_mode mode;
3436 if (! memory_operand (op, mode))
3437 return 0;
3439 return memory_address_length (op) != 0;
3442 /* Return nonzero if the rtx is known aligned. */
3445 aligned_operand (op, mode)
3446 rtx op;
3447 enum machine_mode mode;
3449 struct ix86_address parts;
3451 if (!general_operand (op, mode))
3452 return 0;
3454 /* Registers and immediate operands are always "aligned". */
3455 if (GET_CODE (op) != MEM)
3456 return 1;
3458 /* Don't even try to do any aligned optimizations with volatiles. */
3459 if (MEM_VOLATILE_P (op))
3460 return 0;
3462 op = XEXP (op, 0);
3464 /* Pushes and pops are only valid on the stack pointer. */
3465 if (GET_CODE (op) == PRE_DEC
3466 || GET_CODE (op) == POST_INC)
3467 return 1;
3469 /* Decode the address. */
3470 if (! ix86_decompose_address (op, &parts))
3471 abort ();
3473 if (parts.base && GET_CODE (parts.base) == SUBREG)
3474 parts.base = SUBREG_REG (parts.base);
3475 if (parts.index && GET_CODE (parts.index) == SUBREG)
3476 parts.index = SUBREG_REG (parts.index);
3478 /* Look for some component that isn't known to be aligned. */
3479 if (parts.index)
3481 if (parts.scale < 4
3482 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3483 return 0;
3485 if (parts.base)
3487 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3488 return 0;
3490 if (parts.disp)
3492 if (GET_CODE (parts.disp) != CONST_INT
3493 || (INTVAL (parts.disp) & 3) != 0)
3494 return 0;
3497 /* Didn't find one -- this must be an aligned address. */
3498 return 1;
3501 /* Return true if the constant is something that can be loaded with
3502 a special instruction. Only handle 0.0 and 1.0; others are less
3503 worthwhile. */
3506 standard_80387_constant_p (x)
3507 rtx x;
3509 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3510 return -1;
3511 /* Note that on the 80387, other constants, such as pi, that we should support
3512 too. On some machines, these are much slower to load as standard constant,
3513 than to load from doubles in memory. */
3514 if (x == CONST0_RTX (GET_MODE (x)))
3515 return 1;
3516 if (x == CONST1_RTX (GET_MODE (x)))
3517 return 2;
3518 return 0;
3521 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3524 standard_sse_constant_p (x)
3525 rtx x;
3527 if (GET_CODE (x) != CONST_DOUBLE)
3528 return -1;
3529 return (x == CONST0_RTX (GET_MODE (x)));
3532 /* Returns 1 if OP contains a symbol reference */
3535 symbolic_reference_mentioned_p (op)
3536 rtx op;
3538 register const char *fmt;
3539 register int i;
3541 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3542 return 1;
3544 fmt = GET_RTX_FORMAT (GET_CODE (op));
3545 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3547 if (fmt[i] == 'E')
3549 register int j;
3551 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3552 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3553 return 1;
3556 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3557 return 1;
3560 return 0;
3563 /* Return 1 if it is appropriate to emit `ret' instructions in the
3564 body of a function. Do this only if the epilogue is simple, needing a
3565 couple of insns. Prior to reloading, we can't tell how many registers
3566 must be saved, so return 0 then. Return 0 if there is no frame
3567 marker to de-allocate.
3569 If NON_SAVING_SETJMP is defined and true, then it is not possible
3570 for the epilogue to be simple, so return 0. This is a special case
3571 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3572 until final, but jump_optimize may need to know sooner if a
3573 `return' is OK. */
3576 ix86_can_use_return_insn_p ()
3578 struct ix86_frame frame;
3580 #ifdef NON_SAVING_SETJMP
3581 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3582 return 0;
3583 #endif
3585 if (! reload_completed || frame_pointer_needed)
3586 return 0;
3588 /* Don't allow more than 32 pop, since that's all we can do
3589 with one instruction. */
3590 if (current_function_pops_args
3591 && current_function_args_size >= 32768)
3592 return 0;
3594 ix86_compute_frame_layout (&frame);
3595 return frame.to_allocate == 0 && frame.nregs == 0;
3598 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3600 x86_64_sign_extended_value (value)
3601 rtx value;
3603 switch (GET_CODE (value))
3605 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3606 to be at least 32 and this all acceptable constants are
3607 represented as CONST_INT. */
3608 case CONST_INT:
3609 if (HOST_BITS_PER_WIDE_INT == 32)
3610 return 1;
3611 else
3613 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3614 return trunc_int_for_mode (val, SImode) == val;
3616 break;
3618 /* For certain code models, the symbolic references are known to fit. */
3619 case SYMBOL_REF:
3620 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3622 /* For certain code models, the code is near as well. */
3623 case LABEL_REF:
3624 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3626 /* We also may accept the offsetted memory references in certain special
3627 cases. */
3628 case CONST:
3629 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3630 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3631 return 1;
3632 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3634 rtx op1 = XEXP (XEXP (value, 0), 0);
3635 rtx op2 = XEXP (XEXP (value, 0), 1);
3636 HOST_WIDE_INT offset;
3638 if (ix86_cmodel == CM_LARGE)
3639 return 0;
3640 if (GET_CODE (op2) != CONST_INT)
3641 return 0;
3642 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3643 switch (GET_CODE (op1))
3645 case SYMBOL_REF:
3646 /* For CM_SMALL assume that latest object is 1MB before
3647 end of 31bits boundary. We may also accept pretty
3648 large negative constants knowing that all objects are
3649 in the positive half of address space. */
3650 if (ix86_cmodel == CM_SMALL
3651 && offset < 1024*1024*1024
3652 && trunc_int_for_mode (offset, SImode) == offset)
3653 return 1;
3654 /* For CM_KERNEL we know that all object resist in the
3655 negative half of 32bits address space. We may not
3656 accept negative offsets, since they may be just off
3657 and we may accept pretty large positive ones. */
3658 if (ix86_cmodel == CM_KERNEL
3659 && offset > 0
3660 && trunc_int_for_mode (offset, SImode) == offset)
3661 return 1;
3662 break;
3663 case LABEL_REF:
3664 /* These conditions are similar to SYMBOL_REF ones, just the
3665 constraints for code models differ. */
3666 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3667 && offset < 1024*1024*1024
3668 && trunc_int_for_mode (offset, SImode) == offset)
3669 return 1;
3670 if (ix86_cmodel == CM_KERNEL
3671 && offset > 0
3672 && trunc_int_for_mode (offset, SImode) == offset)
3673 return 1;
3674 break;
3675 default:
3676 return 0;
3679 return 0;
3680 default:
3681 return 0;
3685 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3687 x86_64_zero_extended_value (value)
3688 rtx value;
3690 switch (GET_CODE (value))
3692 case CONST_DOUBLE:
3693 if (HOST_BITS_PER_WIDE_INT == 32)
3694 return (GET_MODE (value) == VOIDmode
3695 && !CONST_DOUBLE_HIGH (value));
3696 else
3697 return 0;
3698 case CONST_INT:
3699 if (HOST_BITS_PER_WIDE_INT == 32)
3700 return INTVAL (value) >= 0;
3701 else
3702 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3703 break;
3705 /* For certain code models, the symbolic references are known to fit. */
3706 case SYMBOL_REF:
3707 return ix86_cmodel == CM_SMALL;
3709 /* For certain code models, the code is near as well. */
3710 case LABEL_REF:
3711 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3713 /* We also may accept the offsetted memory references in certain special
3714 cases. */
3715 case CONST:
3716 if (GET_CODE (XEXP (value, 0)) == PLUS)
3718 rtx op1 = XEXP (XEXP (value, 0), 0);
3719 rtx op2 = XEXP (XEXP (value, 0), 1);
3721 if (ix86_cmodel == CM_LARGE)
3722 return 0;
3723 switch (GET_CODE (op1))
3725 case SYMBOL_REF:
3726 return 0;
3727 /* For small code model we may accept pretty large positive
3728 offsets, since one bit is available for free. Negative
3729 offsets are limited by the size of NULL pointer area
3730 specified by the ABI. */
3731 if (ix86_cmodel == CM_SMALL
3732 && GET_CODE (op2) == CONST_INT
3733 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3734 && (trunc_int_for_mode (INTVAL (op2), SImode)
3735 == INTVAL (op2)))
3736 return 1;
3737 /* ??? For the kernel, we may accept adjustment of
3738 -0x10000000, since we know that it will just convert
3739 negative address space to positive, but perhaps this
3740 is not worthwhile. */
3741 break;
3742 case LABEL_REF:
3743 /* These conditions are similar to SYMBOL_REF ones, just the
3744 constraints for code models differ. */
3745 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3746 && GET_CODE (op2) == CONST_INT
3747 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3748 && (trunc_int_for_mode (INTVAL (op2), SImode)
3749 == INTVAL (op2)))
3750 return 1;
3751 break;
3752 default:
3753 return 0;
3756 return 0;
3757 default:
3758 return 0;
3762 /* Value should be nonzero if functions must have frame pointers.
3763 Zero means the frame pointer need not be set up (and parms may
3764 be accessed via the stack pointer) in functions that seem suitable. */
3767 ix86_frame_pointer_required ()
3769 /* If we accessed previous frames, then the generated code expects
3770 to be able to access the saved ebp value in our frame. */
3771 if (cfun->machine->accesses_prev_frame)
3772 return 1;
3774 /* Several x86 os'es need a frame pointer for other reasons,
3775 usually pertaining to setjmp. */
3776 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3777 return 1;
3779 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3780 the frame pointer by default. Turn it back on now if we've not
3781 got a leaf function. */
3782 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3783 return 1;
3785 return 0;
3788 /* Record that the current function accesses previous call frames. */
3790 void
3791 ix86_setup_frame_addresses ()
3793 cfun->machine->accesses_prev_frame = 1;
3796 static char pic_label_name[32];
3798 /* This function generates code for -fpic that loads %ebx with
3799 the return address of the caller and then returns. */
3801 void
3802 ix86_asm_file_end (file)
3803 FILE *file;
3805 rtx xops[2];
3807 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3808 return;
3810 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3811 to updating relocations to a section being discarded such that this
3812 doesn't work. Ought to detect this at configure time. */
3813 #if 0
3814 /* The trick here is to create a linkonce section containing the
3815 pic label thunk, but to refer to it with an internal label.
3816 Because the label is internal, we don't have inter-dso name
3817 binding issues on hosts that don't support ".hidden".
3819 In order to use these macros, however, we must create a fake
3820 function decl. */
3821 if (targetm.have_named_sections)
3823 tree decl = build_decl (FUNCTION_DECL,
3824 get_identifier ("i686.get_pc_thunk"),
3825 error_mark_node);
3826 DECL_ONE_ONLY (decl) = 1;
3827 (*targetm.asm_out.unique_section) (decl, 0);
3828 named_section (decl, NULL);
3830 else
3831 #else
3832 text_section ();
3833 #endif
3835 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3836 internal (non-global) label that's being emitted, it didn't make
3837 sense to have .type information for local labels. This caused
3838 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3839 me debug info for a label that you're declaring non-global?) this
3840 was changed to call ASM_OUTPUT_LABEL() instead. */
3842 ASM_OUTPUT_LABEL (file, pic_label_name);
3844 xops[0] = pic_offset_table_rtx;
3845 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3846 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3847 output_asm_insn ("ret", xops);
3850 void
3851 load_pic_register ()
3853 rtx gotsym, pclab;
3855 if (TARGET_64BIT)
3856 abort ();
3858 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3860 if (TARGET_DEEP_BRANCH_PREDICTION)
3862 if (! pic_label_name[0])
3863 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3864 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3866 else
3868 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3871 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3873 if (! TARGET_DEEP_BRANCH_PREDICTION)
3874 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3876 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3879 /* Generate an "push" pattern for input ARG. */
3881 static rtx
3882 gen_push (arg)
3883 rtx arg;
3885 return gen_rtx_SET (VOIDmode,
3886 gen_rtx_MEM (Pmode,
3887 gen_rtx_PRE_DEC (Pmode,
3888 stack_pointer_rtx)),
3889 arg);
3892 /* Return 1 if we need to save REGNO. */
3893 static int
3894 ix86_save_reg (regno, maybe_eh_return)
3895 unsigned int regno;
3896 int maybe_eh_return;
3898 if (regno == PIC_OFFSET_TABLE_REGNUM
3899 && (current_function_uses_pic_offset_table
3900 || current_function_uses_const_pool
3901 || current_function_calls_eh_return))
3902 return 1;
3904 if (current_function_calls_eh_return && maybe_eh_return)
3906 unsigned i;
3907 for (i = 0; ; i++)
3909 unsigned test = EH_RETURN_DATA_REGNO (i);
3910 if (test == INVALID_REGNUM)
3911 break;
3912 if (test == regno)
3913 return 1;
3917 return (regs_ever_live[regno]
3918 && !call_used_regs[regno]
3919 && !fixed_regs[regno]
3920 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3923 /* Return number of registers to be saved on the stack. */
3925 static int
3926 ix86_nsaved_regs ()
3928 int nregs = 0;
3929 int regno;
3931 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3932 if (ix86_save_reg (regno, true))
3933 nregs++;
3934 return nregs;
3937 /* Return the offset between two registers, one to be eliminated, and the other
3938 its replacement, at the start of a routine. */
3940 HOST_WIDE_INT
3941 ix86_initial_elimination_offset (from, to)
3942 int from;
3943 int to;
3945 struct ix86_frame frame;
3946 ix86_compute_frame_layout (&frame);
3948 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3949 return frame.hard_frame_pointer_offset;
3950 else if (from == FRAME_POINTER_REGNUM
3951 && to == HARD_FRAME_POINTER_REGNUM)
3952 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3953 else
3955 if (to != STACK_POINTER_REGNUM)
3956 abort ();
3957 else if (from == ARG_POINTER_REGNUM)
3958 return frame.stack_pointer_offset;
3959 else if (from != FRAME_POINTER_REGNUM)
3960 abort ();
3961 else
3962 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3966 /* Fill structure ix86_frame about frame of currently computed function. */
3968 static void
3969 ix86_compute_frame_layout (frame)
3970 struct ix86_frame *frame;
3972 HOST_WIDE_INT total_size;
3973 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3974 int offset;
3975 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3976 HOST_WIDE_INT size = get_frame_size ();
3978 frame->nregs = ix86_nsaved_regs ();
3979 total_size = size;
3981 /* Skip return address and saved base pointer. */
3982 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3984 frame->hard_frame_pointer_offset = offset;
3986 /* Do some sanity checking of stack_alignment_needed and
3987 preferred_alignment, since i386 port is the only using those features
3988 that may break easily. */
3990 if (size && !stack_alignment_needed)
3991 abort ();
3992 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3993 abort ();
3994 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3995 abort ();
3996 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3997 abort ();
3999 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4000 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4002 /* Register save area */
4003 offset += frame->nregs * UNITS_PER_WORD;
4005 /* Va-arg area */
4006 if (ix86_save_varrargs_registers)
4008 offset += X86_64_VARARGS_SIZE;
4009 frame->va_arg_size = X86_64_VARARGS_SIZE;
4011 else
4012 frame->va_arg_size = 0;
4014 /* Align start of frame for local function. */
4015 frame->padding1 = ((offset + stack_alignment_needed - 1)
4016 & -stack_alignment_needed) - offset;
4018 offset += frame->padding1;
4020 /* Frame pointer points here. */
4021 frame->frame_pointer_offset = offset;
4023 offset += size;
4025 /* Add outgoing arguments area. */
4026 if (ACCUMULATE_OUTGOING_ARGS)
4028 offset += current_function_outgoing_args_size;
4029 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4031 else
4032 frame->outgoing_arguments_size = 0;
4034 /* Align stack boundary. */
4035 frame->padding2 = ((offset + preferred_alignment - 1)
4036 & -preferred_alignment) - offset;
4038 offset += frame->padding2;
4040 /* We've reached end of stack frame. */
4041 frame->stack_pointer_offset = offset;
4043 /* Size prologue needs to allocate. */
4044 frame->to_allocate =
4045 (size + frame->padding1 + frame->padding2
4046 + frame->outgoing_arguments_size + frame->va_arg_size);
4048 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4049 && current_function_is_leaf)
4051 frame->red_zone_size = frame->to_allocate;
4052 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4053 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4055 else
4056 frame->red_zone_size = 0;
4057 frame->to_allocate -= frame->red_zone_size;
4058 frame->stack_pointer_offset -= frame->red_zone_size;
4059 #if 0
4060 fprintf (stderr, "nregs: %i\n", frame->nregs);
4061 fprintf (stderr, "size: %i\n", size);
4062 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4063 fprintf (stderr, "padding1: %i\n", frame->padding1);
4064 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4065 fprintf (stderr, "padding2: %i\n", frame->padding2);
4066 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4067 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4068 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4069 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4070 frame->hard_frame_pointer_offset);
4071 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4072 #endif
4075 /* Emit code to save registers in the prologue. */
4077 static void
4078 ix86_emit_save_regs ()
4080 register int regno;
4081 rtx insn;
4083 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4084 if (ix86_save_reg (regno, true))
4086 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4087 RTX_FRAME_RELATED_P (insn) = 1;
4091 /* Emit code to save registers using MOV insns. First register
4092 is restored from POINTER + OFFSET. */
4093 static void
4094 ix86_emit_save_regs_using_mov (pointer, offset)
4095 rtx pointer;
4096 HOST_WIDE_INT offset;
4098 int regno;
4099 rtx insn;
4101 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4102 if (ix86_save_reg (regno, true))
4104 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4105 Pmode, offset),
4106 gen_rtx_REG (Pmode, regno));
4107 RTX_FRAME_RELATED_P (insn) = 1;
4108 offset += UNITS_PER_WORD;
4112 /* Expand the prologue into a bunch of separate insns. */
4114 void
4115 ix86_expand_prologue ()
4117 rtx insn;
4118 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4119 || current_function_uses_const_pool)
4120 && !TARGET_64BIT);
4121 struct ix86_frame frame;
4122 int use_mov = 0;
4123 HOST_WIDE_INT allocate;
4125 if (!optimize_size)
4127 use_fast_prologue_epilogue
4128 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4129 if (TARGET_PROLOGUE_USING_MOVE)
4130 use_mov = use_fast_prologue_epilogue;
4132 ix86_compute_frame_layout (&frame);
4134 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4135 slower on all targets. Also sdb doesn't like it. */
4137 if (frame_pointer_needed)
4139 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4140 RTX_FRAME_RELATED_P (insn) = 1;
4142 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4143 RTX_FRAME_RELATED_P (insn) = 1;
4146 allocate = frame.to_allocate;
4147 /* In case we are dealing only with single register and empty frame,
4148 push is equivalent of the mov+add sequence. */
4149 if (allocate == 0 && frame.nregs <= 1)
4150 use_mov = 0;
4152 if (!use_mov)
4153 ix86_emit_save_regs ();
4154 else
4155 allocate += frame.nregs * UNITS_PER_WORD;
4157 if (allocate == 0)
4159 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4161 insn = emit_insn (gen_pro_epilogue_adjust_stack
4162 (stack_pointer_rtx, stack_pointer_rtx,
4163 GEN_INT (-allocate)));
4164 RTX_FRAME_RELATED_P (insn) = 1;
4166 else
4168 /* ??? Is this only valid for Win32? */
4170 rtx arg0, sym;
4172 if (TARGET_64BIT)
4173 abort ();
4175 arg0 = gen_rtx_REG (SImode, 0);
4176 emit_move_insn (arg0, GEN_INT (allocate));
4178 sym = gen_rtx_MEM (FUNCTION_MODE,
4179 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4180 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4182 CALL_INSN_FUNCTION_USAGE (insn)
4183 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4184 CALL_INSN_FUNCTION_USAGE (insn));
4186 if (use_mov)
4188 if (!frame_pointer_needed || !frame.to_allocate)
4189 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4190 else
4191 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4192 -frame.nregs * UNITS_PER_WORD);
4195 #ifdef SUBTARGET_PROLOGUE
4196 SUBTARGET_PROLOGUE;
4197 #endif
4199 if (pic_reg_used)
4200 load_pic_register ();
4202 /* If we are profiling, make sure no instructions are scheduled before
4203 the call to mcount. However, if -fpic, the above call will have
4204 done that. */
4205 if (current_function_profile && ! pic_reg_used)
4206 emit_insn (gen_blockage ());
4209 /* Emit code to restore saved registers using MOV insns. First register
4210 is restored from POINTER + OFFSET. */
4211 static void
4212 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4213 rtx pointer;
4214 int offset;
4215 int maybe_eh_return;
4217 int regno;
4219 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4220 if (ix86_save_reg (regno, maybe_eh_return))
4222 emit_move_insn (gen_rtx_REG (Pmode, regno),
4223 adjust_address (gen_rtx_MEM (Pmode, pointer),
4224 Pmode, offset));
4225 offset += UNITS_PER_WORD;
4229 /* Restore function stack, frame, and registers. */
4231 void
4232 ix86_expand_epilogue (style)
4233 int style;
4235 int regno;
4236 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4237 struct ix86_frame frame;
4238 HOST_WIDE_INT offset;
4240 ix86_compute_frame_layout (&frame);
4242 /* Calculate start of saved registers relative to ebp. Special care
4243 must be taken for the normal return case of a function using
4244 eh_return: the eax and edx registers are marked as saved, but not
4245 restored along this path. */
4246 offset = frame.nregs;
4247 if (current_function_calls_eh_return && style != 2)
4248 offset -= 2;
4249 offset *= -UNITS_PER_WORD;
4251 /* If we're only restoring one register and sp is not valid then
4252 using a move instruction to restore the register since it's
4253 less work than reloading sp and popping the register.
4255 The default code result in stack adjustment using add/lea instruction,
4256 while this code results in LEAVE instruction (or discrete equivalent),
4257 so it is profitable in some other cases as well. Especially when there
4258 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4259 and there is exactly one register to pop. This heruistic may need some
4260 tuning in future. */
4261 if ((!sp_valid && frame.nregs <= 1)
4262 || (TARGET_EPILOGUE_USING_MOVE
4263 && use_fast_prologue_epilogue
4264 && (frame.nregs > 1 || frame.to_allocate))
4265 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4266 || (frame_pointer_needed && TARGET_USE_LEAVE
4267 && use_fast_prologue_epilogue && frame.nregs == 1)
4268 || current_function_calls_eh_return)
4270 /* Restore registers. We can use ebp or esp to address the memory
4271 locations. If both are available, default to ebp, since offsets
4272 are known to be small. Only exception is esp pointing directly to the
4273 end of block of saved registers, where we may simplify addressing
4274 mode. */
4276 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4277 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4278 frame.to_allocate, style == 2);
4279 else
4280 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4281 offset, style == 2);
4283 /* eh_return epilogues need %ecx added to the stack pointer. */
4284 if (style == 2)
4286 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4288 if (frame_pointer_needed)
4290 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4291 tmp = plus_constant (tmp, UNITS_PER_WORD);
4292 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4294 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4295 emit_move_insn (hard_frame_pointer_rtx, tmp);
4297 emit_insn (gen_pro_epilogue_adjust_stack
4298 (stack_pointer_rtx, sa, const0_rtx));
4300 else
4302 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4303 tmp = plus_constant (tmp, (frame.to_allocate
4304 + frame.nregs * UNITS_PER_WORD));
4305 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4308 else if (!frame_pointer_needed)
4309 emit_insn (gen_pro_epilogue_adjust_stack
4310 (stack_pointer_rtx, stack_pointer_rtx,
4311 GEN_INT (frame.to_allocate
4312 + frame.nregs * UNITS_PER_WORD)));
4313 /* If not an i386, mov & pop is faster than "leave". */
4314 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4315 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4316 else
4318 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4319 hard_frame_pointer_rtx,
4320 const0_rtx));
4321 if (TARGET_64BIT)
4322 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4323 else
4324 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4327 else
4329 /* First step is to deallocate the stack frame so that we can
4330 pop the registers. */
4331 if (!sp_valid)
4333 if (!frame_pointer_needed)
4334 abort ();
4335 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4336 hard_frame_pointer_rtx,
4337 GEN_INT (offset)));
4339 else if (frame.to_allocate)
4340 emit_insn (gen_pro_epilogue_adjust_stack
4341 (stack_pointer_rtx, stack_pointer_rtx,
4342 GEN_INT (frame.to_allocate)));
4344 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4345 if (ix86_save_reg (regno, false))
4347 if (TARGET_64BIT)
4348 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4349 else
4350 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4352 if (frame_pointer_needed)
4354 /* Leave results in shorter dependency chains on CPUs that are
4355 able to grok it fast. */
4356 if (TARGET_USE_LEAVE)
4357 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4358 else if (TARGET_64BIT)
4359 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4360 else
4361 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4365 /* Sibcall epilogues don't want a return instruction. */
4366 if (style == 0)
4367 return;
4369 if (current_function_pops_args && current_function_args_size)
4371 rtx popc = GEN_INT (current_function_pops_args);
4373 /* i386 can only pop 64K bytes. If asked to pop more, pop
4374 return address, do explicit add, and jump indirectly to the
4375 caller. */
4377 if (current_function_pops_args >= 65536)
4379 rtx ecx = gen_rtx_REG (SImode, 2);
4381 /* There are is no "pascal" calling convention in 64bit ABI. */
4382 if (TARGET_64BIT)
4383 abort ();
4385 emit_insn (gen_popsi1 (ecx));
4386 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4387 emit_jump_insn (gen_return_indirect_internal (ecx));
4389 else
4390 emit_jump_insn (gen_return_pop_internal (popc));
4392 else
4393 emit_jump_insn (gen_return_internal ());
4396 /* Extract the parts of an RTL expression that is a valid memory address
4397 for an instruction. Return 0 if the structure of the address is
4398 grossly off. Return -1 if the address contains ASHIFT, so it is not
4399 strictly valid, but still used for computing length of lea instruction.
4402 static int
4403 ix86_decompose_address (addr, out)
4404 register rtx addr;
4405 struct ix86_address *out;
4407 rtx base = NULL_RTX;
4408 rtx index = NULL_RTX;
4409 rtx disp = NULL_RTX;
4410 HOST_WIDE_INT scale = 1;
4411 rtx scale_rtx = NULL_RTX;
4412 int retval = 1;
4414 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4415 base = addr;
4416 else if (GET_CODE (addr) == PLUS)
4418 rtx op0 = XEXP (addr, 0);
4419 rtx op1 = XEXP (addr, 1);
4420 enum rtx_code code0 = GET_CODE (op0);
4421 enum rtx_code code1 = GET_CODE (op1);
4423 if (code0 == REG || code0 == SUBREG)
4425 if (code1 == REG || code1 == SUBREG)
4426 index = op0, base = op1; /* index + base */
4427 else
4428 base = op0, disp = op1; /* base + displacement */
4430 else if (code0 == MULT)
4432 index = XEXP (op0, 0);
4433 scale_rtx = XEXP (op0, 1);
4434 if (code1 == REG || code1 == SUBREG)
4435 base = op1; /* index*scale + base */
4436 else
4437 disp = op1; /* index*scale + disp */
4439 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4441 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4442 scale_rtx = XEXP (XEXP (op0, 0), 1);
4443 base = XEXP (op0, 1);
4444 disp = op1;
4446 else if (code0 == PLUS)
4448 index = XEXP (op0, 0); /* index + base + disp */
4449 base = XEXP (op0, 1);
4450 disp = op1;
4452 else
4453 return 0;
4455 else if (GET_CODE (addr) == MULT)
4457 index = XEXP (addr, 0); /* index*scale */
4458 scale_rtx = XEXP (addr, 1);
4460 else if (GET_CODE (addr) == ASHIFT)
4462 rtx tmp;
4464 /* We're called for lea too, which implements ashift on occasion. */
4465 index = XEXP (addr, 0);
4466 tmp = XEXP (addr, 1);
4467 if (GET_CODE (tmp) != CONST_INT)
4468 return 0;
4469 scale = INTVAL (tmp);
4470 if ((unsigned HOST_WIDE_INT) scale > 3)
4471 return 0;
4472 scale = 1 << scale;
4473 retval = -1;
4475 else
4476 disp = addr; /* displacement */
4478 /* Extract the integral value of scale. */
4479 if (scale_rtx)
4481 if (GET_CODE (scale_rtx) != CONST_INT)
4482 return 0;
4483 scale = INTVAL (scale_rtx);
4486 /* Allow arg pointer and stack pointer as index if there is not scaling */
4487 if (base && index && scale == 1
4488 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4489 || index == stack_pointer_rtx))
4491 rtx tmp = base;
4492 base = index;
4493 index = tmp;
4496 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4497 if ((base == hard_frame_pointer_rtx
4498 || base == frame_pointer_rtx
4499 || base == arg_pointer_rtx) && !disp)
4500 disp = const0_rtx;
4502 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4503 Avoid this by transforming to [%esi+0]. */
4504 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4505 && base && !index && !disp
4506 && REG_P (base)
4507 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4508 disp = const0_rtx;
4510 /* Special case: encode reg+reg instead of reg*2. */
4511 if (!base && index && scale && scale == 2)
4512 base = index, scale = 1;
4514 /* Special case: scaling cannot be encoded without base or displacement. */
4515 if (!base && !disp && index && scale != 1)
4516 disp = const0_rtx;
4518 out->base = base;
4519 out->index = index;
4520 out->disp = disp;
4521 out->scale = scale;
4523 return retval;
4526 /* Return cost of the memory address x.
4527 For i386, it is better to use a complex address than let gcc copy
4528 the address into a reg and make a new pseudo. But not if the address
4529 requires to two regs - that would mean more pseudos with longer
4530 lifetimes. */
4532 ix86_address_cost (x)
4533 rtx x;
4535 struct ix86_address parts;
4536 int cost = 1;
4538 if (!ix86_decompose_address (x, &parts))
4539 abort ();
4541 if (parts.base && GET_CODE (parts.base) == SUBREG)
4542 parts.base = SUBREG_REG (parts.base);
4543 if (parts.index && GET_CODE (parts.index) == SUBREG)
4544 parts.index = SUBREG_REG (parts.index);
4546 /* More complex memory references are better. */
4547 if (parts.disp && parts.disp != const0_rtx)
4548 cost--;
4550 /* Attempt to minimize number of registers in the address. */
4551 if ((parts.base
4552 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4553 || (parts.index
4554 && (!REG_P (parts.index)
4555 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4556 cost++;
4558 if (parts.base
4559 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4560 && parts.index
4561 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4562 && parts.base != parts.index)
4563 cost++;
4565 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4566 since it's predecode logic can't detect the length of instructions
4567 and it degenerates to vector decoded. Increase cost of such
4568 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4569 to split such addresses or even refuse such addresses at all.
4571 Following addressing modes are affected:
4572 [base+scale*index]
4573 [scale*index+disp]
4574 [base+index]
4576 The first and last case may be avoidable by explicitly coding the zero in
4577 memory address, but I don't have AMD-K6 machine handy to check this
4578 theory. */
4580 if (TARGET_K6
4581 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4582 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4583 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4584 cost += 10;
4586 return cost;
4589 /* If X is a machine specific address (i.e. a symbol or label being
4590 referenced as a displacement from the GOT implemented using an
4591 UNSPEC), then return the base term. Otherwise return X. */
4594 ix86_find_base_term (x)
4595 rtx x;
4597 rtx term;
4599 if (TARGET_64BIT)
4601 if (GET_CODE (x) != CONST)
4602 return x;
4603 term = XEXP (x, 0);
4604 if (GET_CODE (term) == PLUS
4605 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4606 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4607 term = XEXP (term, 0);
4608 if (GET_CODE (term) != UNSPEC
4609 || XINT (term, 1) != UNSPEC_GOTPCREL)
4610 return x;
4612 term = XVECEXP (term, 0, 0);
4614 if (GET_CODE (term) != SYMBOL_REF
4615 && GET_CODE (term) != LABEL_REF)
4616 return x;
4618 return term;
4621 if (GET_CODE (x) != PLUS
4622 || XEXP (x, 0) != pic_offset_table_rtx
4623 || GET_CODE (XEXP (x, 1)) != CONST)
4624 return x;
4626 term = XEXP (XEXP (x, 1), 0);
4628 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4629 term = XEXP (term, 0);
4631 if (GET_CODE (term) != UNSPEC
4632 || XINT (term, 1) != UNSPEC_GOTOFF)
4633 return x;
4635 term = XVECEXP (term, 0, 0);
4637 if (GET_CODE (term) != SYMBOL_REF
4638 && GET_CODE (term) != LABEL_REF)
4639 return x;
4641 return term;
4644 /* Determine if a given CONST RTX is a valid memory displacement
4645 in PIC mode. */
4648 legitimate_pic_address_disp_p (disp)
4649 register rtx disp;
4651 /* In 64bit mode we can allow direct addresses of symbols and labels
4652 when they are not dynamic symbols. */
4653 if (TARGET_64BIT)
4655 rtx x = disp;
4656 if (GET_CODE (disp) == CONST)
4657 x = XEXP (disp, 0);
4658 /* ??? Handle PIC code models */
4659 if (GET_CODE (x) == PLUS
4660 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4661 && ix86_cmodel == CM_SMALL_PIC
4662 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4663 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4664 x = XEXP (x, 0);
4665 if (local_symbolic_operand (x, Pmode))
4666 return 1;
4668 if (GET_CODE (disp) != CONST)
4669 return 0;
4670 disp = XEXP (disp, 0);
4672 if (TARGET_64BIT)
4674 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4675 of GOT tables. We should not need these anyway. */
4676 if (GET_CODE (disp) != UNSPEC
4677 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4678 return 0;
4680 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4681 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4682 return 0;
4683 return 1;
4686 if (GET_CODE (disp) == PLUS)
4688 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4689 return 0;
4690 disp = XEXP (disp, 0);
4693 if (GET_CODE (disp) != UNSPEC)
4694 return 0;
4696 /* Must be @GOT or @GOTOFF. */
4697 switch (XINT (disp, 1))
4699 case UNSPEC_GOT:
4700 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4701 case UNSPEC_GOTOFF:
4702 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4705 return 0;
4708 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4709 memory address for an instruction. The MODE argument is the machine mode
4710 for the MEM expression that wants to use this address.
4712 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4713 convert common non-canonical forms to canonical form so that they will
4714 be recognized. */
4717 legitimate_address_p (mode, addr, strict)
4718 enum machine_mode mode;
4719 register rtx addr;
4720 int strict;
4722 struct ix86_address parts;
4723 rtx base, index, disp;
4724 HOST_WIDE_INT scale;
4725 const char *reason = NULL;
4726 rtx reason_rtx = NULL_RTX;
4728 if (TARGET_DEBUG_ADDR)
4730 fprintf (stderr,
4731 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4732 GET_MODE_NAME (mode), strict);
4733 debug_rtx (addr);
4736 if (ix86_decompose_address (addr, &parts) <= 0)
4738 reason = "decomposition failed";
4739 goto report_error;
4742 base = parts.base;
4743 index = parts.index;
4744 disp = parts.disp;
4745 scale = parts.scale;
4747 /* Validate base register.
4749 Don't allow SUBREG's here, it can lead to spill failures when the base
4750 is one word out of a two word structure, which is represented internally
4751 as a DImode int. */
4753 if (base)
4755 rtx reg;
4756 reason_rtx = base;
4758 if (GET_CODE (base) == SUBREG)
4759 reg = SUBREG_REG (base);
4760 else
4761 reg = base;
4763 if (GET_CODE (reg) != REG)
4765 reason = "base is not a register";
4766 goto report_error;
4769 if (GET_MODE (base) != Pmode)
4771 reason = "base is not in Pmode";
4772 goto report_error;
4775 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
4776 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
4778 reason = "base is not valid";
4779 goto report_error;
4783 /* Validate index register.
4785 Don't allow SUBREG's here, it can lead to spill failures when the index
4786 is one word out of a two word structure, which is represented internally
4787 as a DImode int. */
4789 if (index)
4791 rtx reg;
4792 reason_rtx = index;
4794 if (GET_CODE (index) == SUBREG)
4795 reg = SUBREG_REG (index);
4796 else
4797 reg = index;
4799 if (GET_CODE (reg) != REG)
4801 reason = "index is not a register";
4802 goto report_error;
4805 if (GET_MODE (index) != Pmode)
4807 reason = "index is not in Pmode";
4808 goto report_error;
4811 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
4812 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
4814 reason = "index is not valid";
4815 goto report_error;
4819 /* Validate scale factor. */
4820 if (scale != 1)
4822 reason_rtx = GEN_INT (scale);
4823 if (!index)
4825 reason = "scale without index";
4826 goto report_error;
4829 if (scale != 2 && scale != 4 && scale != 8)
4831 reason = "scale is not a valid multiplier";
4832 goto report_error;
4836 /* Validate displacement. */
4837 if (disp)
4839 reason_rtx = disp;
4841 if (!CONSTANT_ADDRESS_P (disp))
4843 reason = "displacement is not constant";
4844 goto report_error;
4847 if (TARGET_64BIT)
4849 if (!x86_64_sign_extended_value (disp))
4851 reason = "displacement is out of range";
4852 goto report_error;
4855 else
4857 if (GET_CODE (disp) == CONST_DOUBLE)
4859 reason = "displacement is a const_double";
4860 goto report_error;
4864 if (flag_pic && SYMBOLIC_CONST (disp))
4866 if (TARGET_64BIT && (index || base))
4868 reason = "non-constant pic memory reference";
4869 goto report_error;
4871 if (! legitimate_pic_address_disp_p (disp))
4873 reason = "displacement is an invalid pic construct";
4874 goto report_error;
4877 /* This code used to verify that a symbolic pic displacement
4878 includes the pic_offset_table_rtx register.
4880 While this is good idea, unfortunately these constructs may
4881 be created by "adds using lea" optimization for incorrect
4882 code like:
4884 int a;
4885 int foo(int i)
4887 return *(&a+i);
4890 This code is nonsensical, but results in addressing
4891 GOT table with pic_offset_table_rtx base. We can't
4892 just refuse it easily, since it gets matched by
4893 "addsi3" pattern, that later gets split to lea in the
4894 case output register differs from input. While this
4895 can be handled by separate addsi pattern for this case
4896 that never results in lea, this seems to be easier and
4897 correct fix for crash to disable this test. */
4899 else if (HALF_PIC_P ())
4901 if (! HALF_PIC_ADDRESS_P (disp)
4902 || (base != NULL_RTX || index != NULL_RTX))
4904 reason = "displacement is an invalid half-pic reference";
4905 goto report_error;
4910 /* Everything looks valid. */
4911 if (TARGET_DEBUG_ADDR)
4912 fprintf (stderr, "Success.\n");
4913 return TRUE;
4915 report_error:
4916 if (TARGET_DEBUG_ADDR)
4918 fprintf (stderr, "Error: %s\n", reason);
4919 debug_rtx (reason_rtx);
4921 return FALSE;
4924 /* Return an unique alias set for the GOT. */
4926 static HOST_WIDE_INT
4927 ix86_GOT_alias_set ()
4929 static HOST_WIDE_INT set = -1;
4930 if (set == -1)
4931 set = new_alias_set ();
4932 return set;
4935 /* Return a legitimate reference for ORIG (an address) using the
4936 register REG. If REG is 0, a new pseudo is generated.
4938 There are two types of references that must be handled:
4940 1. Global data references must load the address from the GOT, via
4941 the PIC reg. An insn is emitted to do this load, and the reg is
4942 returned.
4944 2. Static data references, constant pool addresses, and code labels
4945 compute the address as an offset from the GOT, whose base is in
4946 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4947 differentiate them from global data objects. The returned
4948 address is the PIC reg + an unspec constant.
4950 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4951 reg also appears in the address. */
4954 legitimize_pic_address (orig, reg)
4955 rtx orig;
4956 rtx reg;
4958 rtx addr = orig;
4959 rtx new = orig;
4960 rtx base;
4962 if (local_symbolic_operand (addr, Pmode))
4964 /* In 64bit mode we can address such objects directly. */
4965 if (TARGET_64BIT)
4966 new = addr;
4967 else
4969 /* This symbol may be referenced via a displacement from the PIC
4970 base address (@GOTOFF). */
4972 current_function_uses_pic_offset_table = 1;
4973 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4974 new = gen_rtx_CONST (Pmode, new);
4975 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4977 if (reg != 0)
4979 emit_move_insn (reg, new);
4980 new = reg;
4984 else if (GET_CODE (addr) == SYMBOL_REF)
4986 if (TARGET_64BIT)
4988 current_function_uses_pic_offset_table = 1;
4989 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
4990 new = gen_rtx_CONST (Pmode, new);
4991 new = gen_rtx_MEM (Pmode, new);
4992 RTX_UNCHANGING_P (new) = 1;
4993 set_mem_alias_set (new, ix86_GOT_alias_set ());
4995 if (reg == 0)
4996 reg = gen_reg_rtx (Pmode);
4997 /* Use directly gen_movsi, otherwise the address is loaded
4998 into register for CSE. We don't want to CSE this addresses,
4999 instead we CSE addresses from the GOT table, so skip this. */
5000 emit_insn (gen_movsi (reg, new));
5001 new = reg;
5003 else
5005 /* This symbol must be referenced via a load from the
5006 Global Offset Table (@GOT). */
5008 current_function_uses_pic_offset_table = 1;
5009 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5010 new = gen_rtx_CONST (Pmode, new);
5011 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5012 new = gen_rtx_MEM (Pmode, new);
5013 RTX_UNCHANGING_P (new) = 1;
5014 set_mem_alias_set (new, ix86_GOT_alias_set ());
5016 if (reg == 0)
5017 reg = gen_reg_rtx (Pmode);
5018 emit_move_insn (reg, new);
5019 new = reg;
5022 else
5024 if (GET_CODE (addr) == CONST)
5026 addr = XEXP (addr, 0);
5028 /* We must match stuff we generate before. Assume the only
5029 unspecs that can get here are ours. Not that we could do
5030 anything with them anyway... */
5031 if (GET_CODE (addr) == UNSPEC
5032 || (GET_CODE (addr) == PLUS
5033 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5034 return orig;
5035 if (GET_CODE (addr) != PLUS)
5036 abort ();
5038 if (GET_CODE (addr) == PLUS)
5040 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5042 /* Check first to see if this is a constant offset from a @GOTOFF
5043 symbol reference. */
5044 if (local_symbolic_operand (op0, Pmode)
5045 && GET_CODE (op1) == CONST_INT)
5047 if (!TARGET_64BIT)
5049 current_function_uses_pic_offset_table = 1;
5050 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5051 UNSPEC_GOTOFF);
5052 new = gen_rtx_PLUS (Pmode, new, op1);
5053 new = gen_rtx_CONST (Pmode, new);
5054 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5056 if (reg != 0)
5058 emit_move_insn (reg, new);
5059 new = reg;
5062 else
5064 /* ??? We need to limit offsets here. */
5067 else
5069 base = legitimize_pic_address (XEXP (addr, 0), reg);
5070 new = legitimize_pic_address (XEXP (addr, 1),
5071 base == reg ? NULL_RTX : reg);
5073 if (GET_CODE (new) == CONST_INT)
5074 new = plus_constant (base, INTVAL (new));
5075 else
5077 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5079 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5080 new = XEXP (new, 1);
5082 new = gen_rtx_PLUS (Pmode, base, new);
5087 return new;
5090 /* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
5091 may access it directly in the GOT. */
5093 static void
5094 i386_encode_section_info (decl, first)
5095 tree decl;
5096 int first ATTRIBUTE_UNUSED;
5098 if (flag_pic)
5100 rtx rtl = (TREE_CODE_CLASS (TREE_CODE (decl)) != 'd'
5101 ? TREE_CST_RTL (decl) : DECL_RTL (decl));
5103 if (GET_CODE (rtl) == MEM)
5105 if (TARGET_DEBUG_ADDR
5106 && TREE_CODE_CLASS (TREE_CODE (decl)) == 'd')
5108 fprintf (stderr, "Encode %s, public = %d\n",
5109 IDENTIFIER_POINTER (DECL_NAME (decl)),
5110 TREE_PUBLIC (decl));
5113 SYMBOL_REF_FLAG (XEXP (rtl, 0))
5114 = (TREE_CODE_CLASS (TREE_CODE (decl)) != 'd'
5115 || ! TREE_PUBLIC (decl)
5116 || MODULE_LOCAL_P (decl));
5121 /* Try machine-dependent ways of modifying an illegitimate address
5122 to be legitimate. If we find one, return the new, valid address.
5123 This macro is used in only one place: `memory_address' in explow.c.
5125 OLDX is the address as it was before break_out_memory_refs was called.
5126 In some cases it is useful to look at this to decide what needs to be done.
5128 MODE and WIN are passed so that this macro can use
5129 GO_IF_LEGITIMATE_ADDRESS.
5131 It is always safe for this macro to do nothing. It exists to recognize
5132 opportunities to optimize the output.
5134 For the 80386, we handle X+REG by loading X into a register R and
5135 using R+REG. R will go in a general reg and indexing will be used.
5136 However, if REG is a broken-out memory address or multiplication,
5137 nothing needs to be done because REG can certainly go in a general reg.
5139 When -fpic is used, special handling is needed for symbolic references.
5140 See comments by legitimize_pic_address in i386.c for details. */
5143 legitimize_address (x, oldx, mode)
5144 register rtx x;
5145 register rtx oldx ATTRIBUTE_UNUSED;
5146 enum machine_mode mode;
5148 int changed = 0;
5149 unsigned log;
5151 if (TARGET_DEBUG_ADDR)
5153 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5154 GET_MODE_NAME (mode));
5155 debug_rtx (x);
5158 if (flag_pic && SYMBOLIC_CONST (x))
5159 return legitimize_pic_address (x, 0);
5161 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5162 if (GET_CODE (x) == ASHIFT
5163 && GET_CODE (XEXP (x, 1)) == CONST_INT
5164 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5166 changed = 1;
5167 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5168 GEN_INT (1 << log));
5171 if (GET_CODE (x) == PLUS)
5173 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5175 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5176 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5177 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5179 changed = 1;
5180 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5181 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5182 GEN_INT (1 << log));
5185 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5186 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5187 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5189 changed = 1;
5190 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5191 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5192 GEN_INT (1 << log));
5195 /* Put multiply first if it isn't already. */
5196 if (GET_CODE (XEXP (x, 1)) == MULT)
5198 rtx tmp = XEXP (x, 0);
5199 XEXP (x, 0) = XEXP (x, 1);
5200 XEXP (x, 1) = tmp;
5201 changed = 1;
5204 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5205 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5206 created by virtual register instantiation, register elimination, and
5207 similar optimizations. */
5208 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5210 changed = 1;
5211 x = gen_rtx_PLUS (Pmode,
5212 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5213 XEXP (XEXP (x, 1), 0)),
5214 XEXP (XEXP (x, 1), 1));
5217 /* Canonicalize
5218 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5219 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5220 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5221 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5222 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5223 && CONSTANT_P (XEXP (x, 1)))
5225 rtx constant;
5226 rtx other = NULL_RTX;
5228 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5230 constant = XEXP (x, 1);
5231 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5233 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5235 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5236 other = XEXP (x, 1);
5238 else
5239 constant = 0;
5241 if (constant)
5243 changed = 1;
5244 x = gen_rtx_PLUS (Pmode,
5245 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5246 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5247 plus_constant (other, INTVAL (constant)));
5251 if (changed && legitimate_address_p (mode, x, FALSE))
5252 return x;
5254 if (GET_CODE (XEXP (x, 0)) == MULT)
5256 changed = 1;
5257 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5260 if (GET_CODE (XEXP (x, 1)) == MULT)
5262 changed = 1;
5263 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5266 if (changed
5267 && GET_CODE (XEXP (x, 1)) == REG
5268 && GET_CODE (XEXP (x, 0)) == REG)
5269 return x;
5271 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5273 changed = 1;
5274 x = legitimize_pic_address (x, 0);
5277 if (changed && legitimate_address_p (mode, x, FALSE))
5278 return x;
5280 if (GET_CODE (XEXP (x, 0)) == REG)
5282 register rtx temp = gen_reg_rtx (Pmode);
5283 register rtx val = force_operand (XEXP (x, 1), temp);
5284 if (val != temp)
5285 emit_move_insn (temp, val);
5287 XEXP (x, 1) = temp;
5288 return x;
5291 else if (GET_CODE (XEXP (x, 1)) == REG)
5293 register rtx temp = gen_reg_rtx (Pmode);
5294 register rtx val = force_operand (XEXP (x, 0), temp);
5295 if (val != temp)
5296 emit_move_insn (temp, val);
5298 XEXP (x, 0) = temp;
5299 return x;
5303 return x;
5306 /* Print an integer constant expression in assembler syntax. Addition
5307 and subtraction are the only arithmetic that may appear in these
5308 expressions. FILE is the stdio stream to write to, X is the rtx, and
5309 CODE is the operand print code from the output string. */
5311 static void
5312 output_pic_addr_const (file, x, code)
5313 FILE *file;
5314 rtx x;
5315 int code;
5317 char buf[256];
5319 switch (GET_CODE (x))
5321 case PC:
5322 if (flag_pic)
5323 putc ('.', file);
5324 else
5325 abort ();
5326 break;
5328 case SYMBOL_REF:
5329 assemble_name (file, XSTR (x, 0));
5330 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5331 fputs ("@PLT", file);
5332 break;
5334 case LABEL_REF:
5335 x = XEXP (x, 0);
5336 /* FALLTHRU */
5337 case CODE_LABEL:
5338 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5339 assemble_name (asm_out_file, buf);
5340 break;
5342 case CONST_INT:
5343 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5344 break;
5346 case CONST:
5347 /* This used to output parentheses around the expression,
5348 but that does not work on the 386 (either ATT or BSD assembler). */
5349 output_pic_addr_const (file, XEXP (x, 0), code);
5350 break;
5352 case CONST_DOUBLE:
5353 if (GET_MODE (x) == VOIDmode)
5355 /* We can use %d if the number is <32 bits and positive. */
5356 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5357 fprintf (file, "0x%lx%08lx",
5358 (unsigned long) CONST_DOUBLE_HIGH (x),
5359 (unsigned long) CONST_DOUBLE_LOW (x));
5360 else
5361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5363 else
5364 /* We can't handle floating point constants;
5365 PRINT_OPERAND must handle them. */
5366 output_operand_lossage ("floating constant misused");
5367 break;
5369 case PLUS:
5370 /* Some assemblers need integer constants to appear first. */
5371 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5373 output_pic_addr_const (file, XEXP (x, 0), code);
5374 putc ('+', file);
5375 output_pic_addr_const (file, XEXP (x, 1), code);
5377 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5379 output_pic_addr_const (file, XEXP (x, 1), code);
5380 putc ('+', file);
5381 output_pic_addr_const (file, XEXP (x, 0), code);
5383 else
5384 abort ();
5385 break;
5387 case MINUS:
5388 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5389 output_pic_addr_const (file, XEXP (x, 0), code);
5390 putc ('-', file);
5391 output_pic_addr_const (file, XEXP (x, 1), code);
5392 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5393 break;
5395 case UNSPEC:
5396 if (XVECLEN (x, 0) != 1)
5397 abort ();
5398 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5399 switch (XINT (x, 1))
5401 case UNSPEC_GOT:
5402 fputs ("@GOT", file);
5403 break;
5404 case UNSPEC_GOTOFF:
5405 fputs ("@GOTOFF", file);
5406 break;
5407 case UNSPEC_PLT:
5408 fputs ("@PLT", file);
5409 break;
5410 case UNSPEC_GOTPCREL:
5411 fputs ("@GOTPCREL(%RIP)", file);
5412 break;
5413 default:
5414 output_operand_lossage ("invalid UNSPEC as operand");
5415 break;
5417 break;
5419 default:
5420 output_operand_lossage ("invalid expression as operand");
5424 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5425 We need to handle our special PIC relocations. */
5427 void
5428 i386_dwarf_output_addr_const (file, x)
5429 FILE *file;
5430 rtx x;
5432 #ifdef ASM_QUAD
5433 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5434 #else
5435 if (TARGET_64BIT)
5436 abort ();
5437 fprintf (file, "%s", ASM_LONG);
5438 #endif
5439 if (flag_pic)
5440 output_pic_addr_const (file, x, '\0');
5441 else
5442 output_addr_const (file, x);
5443 fputc ('\n', file);
5446 /* In the name of slightly smaller debug output, and to cater to
5447 general assembler losage, recognize PIC+GOTOFF and turn it back
5448 into a direct symbol reference. */
5451 i386_simplify_dwarf_addr (orig_x)
5452 rtx orig_x;
5454 rtx x = orig_x, y;
5456 if (GET_CODE (x) == MEM)
5457 x = XEXP (x, 0);
5459 if (TARGET_64BIT)
5461 if (GET_CODE (x) != CONST
5462 || GET_CODE (XEXP (x, 0)) != UNSPEC
5463 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5464 || GET_CODE (orig_x) != MEM)
5465 return orig_x;
5466 return XVECEXP (XEXP (x, 0), 0, 0);
5469 if (GET_CODE (x) != PLUS
5470 || GET_CODE (XEXP (x, 1)) != CONST)
5471 return orig_x;
5473 if (GET_CODE (XEXP (x, 0)) == REG
5474 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5475 /* %ebx + GOT/GOTOFF */
5476 y = NULL;
5477 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5479 /* %ebx + %reg * scale + GOT/GOTOFF */
5480 y = XEXP (x, 0);
5481 if (GET_CODE (XEXP (y, 0)) == REG
5482 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5483 y = XEXP (y, 1);
5484 else if (GET_CODE (XEXP (y, 1)) == REG
5485 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5486 y = XEXP (y, 0);
5487 else
5488 return orig_x;
5489 if (GET_CODE (y) != REG
5490 && GET_CODE (y) != MULT
5491 && GET_CODE (y) != ASHIFT)
5492 return orig_x;
5494 else
5495 return orig_x;
5497 x = XEXP (XEXP (x, 1), 0);
5498 if (GET_CODE (x) == UNSPEC
5499 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5500 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
5502 if (y)
5503 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5504 return XVECEXP (x, 0, 0);
5507 if (GET_CODE (x) == PLUS
5508 && GET_CODE (XEXP (x, 0)) == UNSPEC
5509 && GET_CODE (XEXP (x, 1)) == CONST_INT
5510 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5511 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5512 && GET_CODE (orig_x) != MEM)))
5514 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5515 if (y)
5516 return gen_rtx_PLUS (Pmode, y, x);
5517 return x;
5520 return orig_x;
5523 static void
5524 put_condition_code (code, mode, reverse, fp, file)
5525 enum rtx_code code;
5526 enum machine_mode mode;
5527 int reverse, fp;
5528 FILE *file;
5530 const char *suffix;
5532 if (mode == CCFPmode || mode == CCFPUmode)
5534 enum rtx_code second_code, bypass_code;
5535 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5536 if (bypass_code != NIL || second_code != NIL)
5537 abort ();
5538 code = ix86_fp_compare_code_to_integer (code);
5539 mode = CCmode;
5541 if (reverse)
5542 code = reverse_condition (code);
5544 switch (code)
5546 case EQ:
5547 suffix = "e";
5548 break;
5549 case NE:
5550 suffix = "ne";
5551 break;
5552 case GT:
5553 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5554 abort ();
5555 suffix = "g";
5556 break;
5557 case GTU:
5558 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5559 Those same assemblers have the same but opposite losage on cmov. */
5560 if (mode != CCmode)
5561 abort ();
5562 suffix = fp ? "nbe" : "a";
5563 break;
5564 case LT:
5565 if (mode == CCNOmode || mode == CCGOCmode)
5566 suffix = "s";
5567 else if (mode == CCmode || mode == CCGCmode)
5568 suffix = "l";
5569 else
5570 abort ();
5571 break;
5572 case LTU:
5573 if (mode != CCmode)
5574 abort ();
5575 suffix = "b";
5576 break;
5577 case GE:
5578 if (mode == CCNOmode || mode == CCGOCmode)
5579 suffix = "ns";
5580 else if (mode == CCmode || mode == CCGCmode)
5581 suffix = "ge";
5582 else
5583 abort ();
5584 break;
5585 case GEU:
5586 /* ??? As above. */
5587 if (mode != CCmode)
5588 abort ();
5589 suffix = fp ? "nb" : "ae";
5590 break;
5591 case LE:
5592 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5593 abort ();
5594 suffix = "le";
5595 break;
5596 case LEU:
5597 if (mode != CCmode)
5598 abort ();
5599 suffix = "be";
5600 break;
5601 case UNORDERED:
5602 suffix = fp ? "u" : "p";
5603 break;
5604 case ORDERED:
5605 suffix = fp ? "nu" : "np";
5606 break;
5607 default:
5608 abort ();
5610 fputs (suffix, file);
5613 void
5614 print_reg (x, code, file)
5615 rtx x;
5616 int code;
5617 FILE *file;
5619 if (REGNO (x) == ARG_POINTER_REGNUM
5620 || REGNO (x) == FRAME_POINTER_REGNUM
5621 || REGNO (x) == FLAGS_REG
5622 || REGNO (x) == FPSR_REG)
5623 abort ();
5625 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
5626 putc ('%', file);
5628 if (code == 'w' || MMX_REG_P (x))
5629 code = 2;
5630 else if (code == 'b')
5631 code = 1;
5632 else if (code == 'k')
5633 code = 4;
5634 else if (code == 'q')
5635 code = 8;
5636 else if (code == 'y')
5637 code = 3;
5638 else if (code == 'h')
5639 code = 0;
5640 else
5641 code = GET_MODE_SIZE (GET_MODE (x));
5643 /* Irritatingly, AMD extended registers use different naming convention
5644 from the normal registers. */
5645 if (REX_INT_REG_P (x))
5647 if (!TARGET_64BIT)
5648 abort ();
5649 switch (code)
5651 case 0:
5652 error ("extended registers have no high halves");
5653 break;
5654 case 1:
5655 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5656 break;
5657 case 2:
5658 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5659 break;
5660 case 4:
5661 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5662 break;
5663 case 8:
5664 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5665 break;
5666 default:
5667 error ("unsupported operand size for extended register");
5668 break;
5670 return;
5672 switch (code)
5674 case 3:
5675 if (STACK_TOP_P (x))
5677 fputs ("st(0)", file);
5678 break;
5680 /* FALLTHRU */
5681 case 8:
5682 case 4:
5683 case 12:
5684 if (! ANY_FP_REG_P (x))
5685 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5686 /* FALLTHRU */
5687 case 16:
5688 case 2:
5689 fputs (hi_reg_name[REGNO (x)], file);
5690 break;
5691 case 1:
5692 fputs (qi_reg_name[REGNO (x)], file);
5693 break;
5694 case 0:
5695 fputs (qi_high_reg_name[REGNO (x)], file);
5696 break;
5697 default:
5698 abort ();
5702 /* Meaning of CODE:
5703 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5704 C -- print opcode suffix for set/cmov insn.
5705 c -- like C, but print reversed condition
5706 F,f -- likewise, but for floating-point.
5707 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
5708 nothing
5709 R -- print the prefix for register names.
5710 z -- print the opcode suffix for the size of the current operand.
5711 * -- print a star (in certain assembler syntax)
5712 A -- print an absolute memory reference.
5713 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5714 s -- print a shift double count, followed by the assemblers argument
5715 delimiter.
5716 b -- print the QImode name of the register for the indicated operand.
5717 %b0 would print %al if operands[0] is reg 0.
5718 w -- likewise, print the HImode name of the register.
5719 k -- likewise, print the SImode name of the register.
5720 q -- likewise, print the DImode name of the register.
5721 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5722 y -- print "st(0)" instead of "st" as a register.
5723 D -- print condition for SSE cmp instruction.
5724 P -- if PIC, print an @PLT suffix.
5725 X -- don't print any sort of PIC '@' suffix for a symbol.
5728 void
5729 print_operand (file, x, code)
5730 FILE *file;
5731 rtx x;
5732 int code;
5734 if (code)
5736 switch (code)
5738 case '*':
5739 if (ASSEMBLER_DIALECT == ASM_ATT)
5740 putc ('*', file);
5741 return;
5743 case 'A':
5744 if (ASSEMBLER_DIALECT == ASM_ATT)
5745 putc ('*', file);
5746 else if (ASSEMBLER_DIALECT == ASM_INTEL)
5748 /* Intel syntax. For absolute addresses, registers should not
5749 be surrounded by braces. */
5750 if (GET_CODE (x) != REG)
5752 putc ('[', file);
5753 PRINT_OPERAND (file, x, 0);
5754 putc (']', file);
5755 return;
5758 else
5759 abort ();
5761 PRINT_OPERAND (file, x, 0);
5762 return;
5765 case 'L':
5766 if (ASSEMBLER_DIALECT == ASM_ATT)
5767 putc ('l', file);
5768 return;
5770 case 'W':
5771 if (ASSEMBLER_DIALECT == ASM_ATT)
5772 putc ('w', file);
5773 return;
5775 case 'B':
5776 if (ASSEMBLER_DIALECT == ASM_ATT)
5777 putc ('b', file);
5778 return;
5780 case 'Q':
5781 if (ASSEMBLER_DIALECT == ASM_ATT)
5782 putc ('l', file);
5783 return;
5785 case 'S':
5786 if (ASSEMBLER_DIALECT == ASM_ATT)
5787 putc ('s', file);
5788 return;
5790 case 'T':
5791 if (ASSEMBLER_DIALECT == ASM_ATT)
5792 putc ('t', file);
5793 return;
5795 case 'z':
5796 /* 387 opcodes don't get size suffixes if the operands are
5797 registers. */
5798 if (STACK_REG_P (x))
5799 return;
5801 /* Likewise if using Intel opcodes. */
5802 if (ASSEMBLER_DIALECT == ASM_INTEL)
5803 return;
5805 /* This is the size of op from size of operand. */
5806 switch (GET_MODE_SIZE (GET_MODE (x)))
5808 case 2:
5809 #ifdef HAVE_GAS_FILDS_FISTS
5810 putc ('s', file);
5811 #endif
5812 return;
5814 case 4:
5815 if (GET_MODE (x) == SFmode)
5817 putc ('s', file);
5818 return;
5820 else
5821 putc ('l', file);
5822 return;
5824 case 12:
5825 case 16:
5826 putc ('t', file);
5827 return;
5829 case 8:
5830 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5832 #ifdef GAS_MNEMONICS
5833 putc ('q', file);
5834 #else
5835 putc ('l', file);
5836 putc ('l', file);
5837 #endif
5839 else
5840 putc ('l', file);
5841 return;
5843 default:
5844 abort ();
5847 case 'b':
5848 case 'w':
5849 case 'k':
5850 case 'q':
5851 case 'h':
5852 case 'y':
5853 case 'X':
5854 case 'P':
5855 break;
5857 case 's':
5858 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5860 PRINT_OPERAND (file, x, 0);
5861 putc (',', file);
5863 return;
5865 case 'D':
5866 /* Little bit of braindamage here. The SSE compare instructions
5867 does use completely different names for the comparisons that the
5868 fp conditional moves. */
5869 switch (GET_CODE (x))
5871 case EQ:
5872 case UNEQ:
5873 fputs ("eq", file);
5874 break;
5875 case LT:
5876 case UNLT:
5877 fputs ("lt", file);
5878 break;
5879 case LE:
5880 case UNLE:
5881 fputs ("le", file);
5882 break;
5883 case UNORDERED:
5884 fputs ("unord", file);
5885 break;
5886 case NE:
5887 case LTGT:
5888 fputs ("neq", file);
5889 break;
5890 case UNGE:
5891 case GE:
5892 fputs ("nlt", file);
5893 break;
5894 case UNGT:
5895 case GT:
5896 fputs ("nle", file);
5897 break;
5898 case ORDERED:
5899 fputs ("ord", file);
5900 break;
5901 default:
5902 abort ();
5903 break;
5905 return;
5906 case 'O':
5907 #ifdef CMOV_SUN_AS_SYNTAX
5908 if (ASSEMBLER_DIALECT == ASM_ATT)
5910 switch (GET_MODE (x))
5912 case HImode: putc ('w', file); break;
5913 case SImode:
5914 case SFmode: putc ('l', file); break;
5915 case DImode:
5916 case DFmode: putc ('q', file); break;
5917 default: abort ();
5919 putc ('.', file);
5921 #endif
5922 return;
5923 case 'C':
5924 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5925 return;
5926 case 'F':
5927 #ifdef CMOV_SUN_AS_SYNTAX
5928 if (ASSEMBLER_DIALECT == ASM_ATT)
5929 putc ('.', file);
5930 #endif
5931 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5932 return;
5934 /* Like above, but reverse condition */
5935 case 'c':
5936 /* Check to see if argument to %c is really a constant
5937 and not a condition code which needs to be reversed. */
5938 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5940 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5941 return;
5943 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5944 return;
5945 case 'f':
5946 #ifdef CMOV_SUN_AS_SYNTAX
5947 if (ASSEMBLER_DIALECT == ASM_ATT)
5948 putc ('.', file);
5949 #endif
5950 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5951 return;
5952 case '+':
5954 rtx x;
5956 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5957 return;
5959 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5960 if (x)
5962 int pred_val = INTVAL (XEXP (x, 0));
5964 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5965 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5967 int taken = pred_val > REG_BR_PROB_BASE / 2;
5968 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5970 /* Emit hints only in the case default branch prediction
5971 heruistics would fail. */
5972 if (taken != cputaken)
5974 /* We use 3e (DS) prefix for taken branches and
5975 2e (CS) prefix for not taken branches. */
5976 if (taken)
5977 fputs ("ds ; ", file);
5978 else
5979 fputs ("cs ; ", file);
5983 return;
5985 default:
5986 output_operand_lossage ("invalid operand code `%c'", code);
5990 if (GET_CODE (x) == REG)
5992 PRINT_REG (x, code, file);
5995 else if (GET_CODE (x) == MEM)
5997 /* No `byte ptr' prefix for call instructions. */
5998 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6000 const char * size;
6001 switch (GET_MODE_SIZE (GET_MODE (x)))
6003 case 1: size = "BYTE"; break;
6004 case 2: size = "WORD"; break;
6005 case 4: size = "DWORD"; break;
6006 case 8: size = "QWORD"; break;
6007 case 12: size = "XWORD"; break;
6008 case 16: size = "XMMWORD"; break;
6009 default:
6010 abort ();
6013 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6014 if (code == 'b')
6015 size = "BYTE";
6016 else if (code == 'w')
6017 size = "WORD";
6018 else if (code == 'k')
6019 size = "DWORD";
6021 fputs (size, file);
6022 fputs (" PTR ", file);
6025 x = XEXP (x, 0);
6026 if (flag_pic && CONSTANT_ADDRESS_P (x))
6027 output_pic_addr_const (file, x, code);
6028 /* Avoid (%rip) for call operands. */
6029 else if (CONSTANT_ADDRESS_P (x) && code =='P'
6030 && GET_CODE (x) != CONST_INT)
6031 output_addr_const (file, x);
6032 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6033 output_operand_lossage ("invalid constraints for operand");
6034 else
6035 output_address (x);
6038 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6040 REAL_VALUE_TYPE r;
6041 long l;
6043 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6044 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6046 if (ASSEMBLER_DIALECT == ASM_ATT)
6047 putc ('$', file);
6048 fprintf (file, "0x%lx", l);
6051 /* These float cases don't actually occur as immediate operands. */
6052 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6054 REAL_VALUE_TYPE r;
6055 char dstr[30];
6057 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6058 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6059 fprintf (file, "%s", dstr);
6062 else if (GET_CODE (x) == CONST_DOUBLE
6063 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6065 REAL_VALUE_TYPE r;
6066 char dstr[30];
6068 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6069 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6070 fprintf (file, "%s", dstr);
6072 else
6074 if (code != 'P')
6076 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6078 if (ASSEMBLER_DIALECT == ASM_ATT)
6079 putc ('$', file);
6081 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6082 || GET_CODE (x) == LABEL_REF)
6084 if (ASSEMBLER_DIALECT == ASM_ATT)
6085 putc ('$', file);
6086 else
6087 fputs ("OFFSET FLAT:", file);
6090 if (GET_CODE (x) == CONST_INT)
6091 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6092 else if (flag_pic)
6093 output_pic_addr_const (file, x, code);
6094 else
6095 output_addr_const (file, x);
6099 /* Print a memory operand whose address is ADDR. */
6101 void
6102 print_operand_address (file, addr)
6103 FILE *file;
6104 register rtx addr;
6106 struct ix86_address parts;
6107 rtx base, index, disp;
6108 int scale;
6110 if (! ix86_decompose_address (addr, &parts))
6111 abort ();
6113 base = parts.base;
6114 index = parts.index;
6115 disp = parts.disp;
6116 scale = parts.scale;
6118 if (!base && !index)
6120 /* Displacement only requires special attention. */
6122 if (GET_CODE (disp) == CONST_INT)
6124 if (ASSEMBLER_DIALECT == ASM_INTEL)
6126 if (USER_LABEL_PREFIX[0] == 0)
6127 putc ('%', file);
6128 fputs ("ds:", file);
6130 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6132 else if (flag_pic)
6133 output_pic_addr_const (file, addr, 0);
6134 else
6135 output_addr_const (file, addr);
6137 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6138 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6139 fputs ("(%rip)", file);
6141 else
6143 if (ASSEMBLER_DIALECT == ASM_ATT)
6145 if (disp)
6147 if (flag_pic)
6148 output_pic_addr_const (file, disp, 0);
6149 else if (GET_CODE (disp) == LABEL_REF)
6150 output_asm_label (disp);
6151 else
6152 output_addr_const (file, disp);
6155 putc ('(', file);
6156 if (base)
6157 PRINT_REG (base, 0, file);
6158 if (index)
6160 putc (',', file);
6161 PRINT_REG (index, 0, file);
6162 if (scale != 1)
6163 fprintf (file, ",%d", scale);
6165 putc (')', file);
6167 else
6169 rtx offset = NULL_RTX;
6171 if (disp)
6173 /* Pull out the offset of a symbol; print any symbol itself. */
6174 if (GET_CODE (disp) == CONST
6175 && GET_CODE (XEXP (disp, 0)) == PLUS
6176 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6178 offset = XEXP (XEXP (disp, 0), 1);
6179 disp = gen_rtx_CONST (VOIDmode,
6180 XEXP (XEXP (disp, 0), 0));
6183 if (flag_pic)
6184 output_pic_addr_const (file, disp, 0);
6185 else if (GET_CODE (disp) == LABEL_REF)
6186 output_asm_label (disp);
6187 else if (GET_CODE (disp) == CONST_INT)
6188 offset = disp;
6189 else
6190 output_addr_const (file, disp);
6193 putc ('[', file);
6194 if (base)
6196 PRINT_REG (base, 0, file);
6197 if (offset)
6199 if (INTVAL (offset) >= 0)
6200 putc ('+', file);
6201 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6204 else if (offset)
6205 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6206 else
6207 putc ('0', file);
6209 if (index)
6211 putc ('+', file);
6212 PRINT_REG (index, 0, file);
6213 if (scale != 1)
6214 fprintf (file, "*%d", scale);
6216 putc (']', file);
6221 /* Split one or more DImode RTL references into pairs of SImode
6222 references. The RTL can be REG, offsettable MEM, integer constant, or
6223 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6224 split and "num" is its length. lo_half and hi_half are output arrays
6225 that parallel "operands". */
6227 void
6228 split_di (operands, num, lo_half, hi_half)
6229 rtx operands[];
6230 int num;
6231 rtx lo_half[], hi_half[];
6233 while (num--)
6235 rtx op = operands[num];
6237 /* simplify_subreg refuse to split volatile memory addresses,
6238 but we still have to handle it. */
6239 if (GET_CODE (op) == MEM)
6241 lo_half[num] = adjust_address (op, SImode, 0);
6242 hi_half[num] = adjust_address (op, SImode, 4);
6244 else
6246 lo_half[num] = simplify_gen_subreg (SImode, op,
6247 GET_MODE (op) == VOIDmode
6248 ? DImode : GET_MODE (op), 0);
6249 hi_half[num] = simplify_gen_subreg (SImode, op,
6250 GET_MODE (op) == VOIDmode
6251 ? DImode : GET_MODE (op), 4);
6255 /* Split one or more TImode RTL references into pairs of SImode
6256 references. The RTL can be REG, offsettable MEM, integer constant, or
6257 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6258 split and "num" is its length. lo_half and hi_half are output arrays
6259 that parallel "operands". */
6261 void
6262 split_ti (operands, num, lo_half, hi_half)
6263 rtx operands[];
6264 int num;
6265 rtx lo_half[], hi_half[];
6267 while (num--)
6269 rtx op = operands[num];
6271 /* simplify_subreg refuse to split volatile memory addresses, but we
6272 still have to handle it. */
6273 if (GET_CODE (op) == MEM)
6275 lo_half[num] = adjust_address (op, DImode, 0);
6276 hi_half[num] = adjust_address (op, DImode, 8);
6278 else
6280 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6281 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6286 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6287 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6288 is the expression of the binary operation. The output may either be
6289 emitted here, or returned to the caller, like all output_* functions.
6291 There is no guarantee that the operands are the same mode, as they
6292 might be within FLOAT or FLOAT_EXTEND expressions. */
6294 #ifndef SYSV386_COMPAT
6295 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6296 wants to fix the assemblers because that causes incompatibility
6297 with gcc. No-one wants to fix gcc because that causes
6298 incompatibility with assemblers... You can use the option of
6299 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6300 #define SYSV386_COMPAT 1
6301 #endif
6303 const char *
6304 output_387_binary_op (insn, operands)
6305 rtx insn;
6306 rtx *operands;
6308 static char buf[30];
6309 const char *p;
6310 const char *ssep;
6311 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6313 #ifdef ENABLE_CHECKING
6314 /* Even if we do not want to check the inputs, this documents input
6315 constraints. Which helps in understanding the following code. */
6316 if (STACK_REG_P (operands[0])
6317 && ((REG_P (operands[1])
6318 && REGNO (operands[0]) == REGNO (operands[1])
6319 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6320 || (REG_P (operands[2])
6321 && REGNO (operands[0]) == REGNO (operands[2])
6322 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6323 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6324 ; /* ok */
6325 else if (!is_sse)
6326 abort ();
6327 #endif
6329 switch (GET_CODE (operands[3]))
6331 case PLUS:
6332 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6333 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6334 p = "fiadd";
6335 else
6336 p = "fadd";
6337 ssep = "add";
6338 break;
6340 case MINUS:
6341 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6342 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6343 p = "fisub";
6344 else
6345 p = "fsub";
6346 ssep = "sub";
6347 break;
6349 case MULT:
6350 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6351 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6352 p = "fimul";
6353 else
6354 p = "fmul";
6355 ssep = "mul";
6356 break;
6358 case DIV:
6359 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6360 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6361 p = "fidiv";
6362 else
6363 p = "fdiv";
6364 ssep = "div";
6365 break;
6367 default:
6368 abort ();
6371 if (is_sse)
6373 strcpy (buf, ssep);
6374 if (GET_MODE (operands[0]) == SFmode)
6375 strcat (buf, "ss\t{%2, %0|%0, %2}");
6376 else
6377 strcat (buf, "sd\t{%2, %0|%0, %2}");
6378 return buf;
6380 strcpy (buf, p);
6382 switch (GET_CODE (operands[3]))
6384 case MULT:
6385 case PLUS:
6386 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6388 rtx temp = operands[2];
6389 operands[2] = operands[1];
6390 operands[1] = temp;
6393 /* know operands[0] == operands[1]. */
6395 if (GET_CODE (operands[2]) == MEM)
6397 p = "%z2\t%2";
6398 break;
6401 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6403 if (STACK_TOP_P (operands[0]))
6404 /* How is it that we are storing to a dead operand[2]?
6405 Well, presumably operands[1] is dead too. We can't
6406 store the result to st(0) as st(0) gets popped on this
6407 instruction. Instead store to operands[2] (which I
6408 think has to be st(1)). st(1) will be popped later.
6409 gcc <= 2.8.1 didn't have this check and generated
6410 assembly code that the Unixware assembler rejected. */
6411 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6412 else
6413 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6414 break;
6417 if (STACK_TOP_P (operands[0]))
6418 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6419 else
6420 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6421 break;
6423 case MINUS:
6424 case DIV:
6425 if (GET_CODE (operands[1]) == MEM)
6427 p = "r%z1\t%1";
6428 break;
6431 if (GET_CODE (operands[2]) == MEM)
6433 p = "%z2\t%2";
6434 break;
6437 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6439 #if SYSV386_COMPAT
6440 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6441 derived assemblers, confusingly reverse the direction of
6442 the operation for fsub{r} and fdiv{r} when the
6443 destination register is not st(0). The Intel assembler
6444 doesn't have this brain damage. Read !SYSV386_COMPAT to
6445 figure out what the hardware really does. */
6446 if (STACK_TOP_P (operands[0]))
6447 p = "{p\t%0, %2|rp\t%2, %0}";
6448 else
6449 p = "{rp\t%2, %0|p\t%0, %2}";
6450 #else
6451 if (STACK_TOP_P (operands[0]))
6452 /* As above for fmul/fadd, we can't store to st(0). */
6453 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6454 else
6455 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6456 #endif
6457 break;
6460 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6462 #if SYSV386_COMPAT
6463 if (STACK_TOP_P (operands[0]))
6464 p = "{rp\t%0, %1|p\t%1, %0}";
6465 else
6466 p = "{p\t%1, %0|rp\t%0, %1}";
6467 #else
6468 if (STACK_TOP_P (operands[0]))
6469 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6470 else
6471 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6472 #endif
6473 break;
6476 if (STACK_TOP_P (operands[0]))
6478 if (STACK_TOP_P (operands[1]))
6479 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6480 else
6481 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6482 break;
6484 else if (STACK_TOP_P (operands[1]))
6486 #if SYSV386_COMPAT
6487 p = "{\t%1, %0|r\t%0, %1}";
6488 #else
6489 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6490 #endif
6492 else
6494 #if SYSV386_COMPAT
6495 p = "{r\t%2, %0|\t%0, %2}";
6496 #else
6497 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6498 #endif
6500 break;
6502 default:
6503 abort ();
6506 strcat (buf, p);
6507 return buf;
6510 /* Output code to initialize control word copies used by
6511 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6512 is set to control word rounding downwards. */
6513 void
6514 emit_i387_cw_initialization (normal, round_down)
6515 rtx normal, round_down;
6517 rtx reg = gen_reg_rtx (HImode);
6519 emit_insn (gen_x86_fnstcw_1 (normal));
6520 emit_move_insn (reg, normal);
6521 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6522 && !TARGET_64BIT)
6523 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6524 else
6525 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6526 emit_move_insn (round_down, reg);
6529 /* Output code for INSN to convert a float to a signed int. OPERANDS
6530 are the insn operands. The output may be [HSD]Imode and the input
6531 operand may be [SDX]Fmode. */
6533 const char *
6534 output_fix_trunc (insn, operands)
6535 rtx insn;
6536 rtx *operands;
6538 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6539 int dimode_p = GET_MODE (operands[0]) == DImode;
6541 /* Jump through a hoop or two for DImode, since the hardware has no
6542 non-popping instruction. We used to do this a different way, but
6543 that was somewhat fragile and broke with post-reload splitters. */
6544 if (dimode_p && !stack_top_dies)
6545 output_asm_insn ("fld\t%y1", operands);
6547 if (!STACK_TOP_P (operands[1]))
6548 abort ();
6550 if (GET_CODE (operands[0]) != MEM)
6551 abort ();
6553 output_asm_insn ("fldcw\t%3", operands);
6554 if (stack_top_dies || dimode_p)
6555 output_asm_insn ("fistp%z0\t%0", operands);
6556 else
6557 output_asm_insn ("fist%z0\t%0", operands);
6558 output_asm_insn ("fldcw\t%2", operands);
6560 return "";
6563 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6564 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6565 when fucom should be used. */
6567 const char *
6568 output_fp_compare (insn, operands, eflags_p, unordered_p)
6569 rtx insn;
6570 rtx *operands;
6571 int eflags_p, unordered_p;
6573 int stack_top_dies;
6574 rtx cmp_op0 = operands[0];
6575 rtx cmp_op1 = operands[1];
6576 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6578 if (eflags_p == 2)
6580 cmp_op0 = cmp_op1;
6581 cmp_op1 = operands[2];
6583 if (is_sse)
6585 if (GET_MODE (operands[0]) == SFmode)
6586 if (unordered_p)
6587 return "ucomiss\t{%1, %0|%0, %1}";
6588 else
6589 return "comiss\t{%1, %0|%0, %y}";
6590 else
6591 if (unordered_p)
6592 return "ucomisd\t{%1, %0|%0, %1}";
6593 else
6594 return "comisd\t{%1, %0|%0, %y}";
6597 if (! STACK_TOP_P (cmp_op0))
6598 abort ();
6600 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6602 if (STACK_REG_P (cmp_op1)
6603 && stack_top_dies
6604 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6605 && REGNO (cmp_op1) != FIRST_STACK_REG)
6607 /* If both the top of the 387 stack dies, and the other operand
6608 is also a stack register that dies, then this must be a
6609 `fcompp' float compare */
6611 if (eflags_p == 1)
6613 /* There is no double popping fcomi variant. Fortunately,
6614 eflags is immune from the fstp's cc clobbering. */
6615 if (unordered_p)
6616 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6617 else
6618 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6619 return "fstp\t%y0";
6621 else
6623 if (eflags_p == 2)
6625 if (unordered_p)
6626 return "fucompp\n\tfnstsw\t%0";
6627 else
6628 return "fcompp\n\tfnstsw\t%0";
6630 else
6632 if (unordered_p)
6633 return "fucompp";
6634 else
6635 return "fcompp";
6639 else
6641 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6643 static const char * const alt[24] =
6645 "fcom%z1\t%y1",
6646 "fcomp%z1\t%y1",
6647 "fucom%z1\t%y1",
6648 "fucomp%z1\t%y1",
6650 "ficom%z1\t%y1",
6651 "ficomp%z1\t%y1",
6652 NULL,
6653 NULL,
6655 "fcomi\t{%y1, %0|%0, %y1}",
6656 "fcomip\t{%y1, %0|%0, %y1}",
6657 "fucomi\t{%y1, %0|%0, %y1}",
6658 "fucomip\t{%y1, %0|%0, %y1}",
6660 NULL,
6661 NULL,
6662 NULL,
6663 NULL,
6665 "fcom%z2\t%y2\n\tfnstsw\t%0",
6666 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6667 "fucom%z2\t%y2\n\tfnstsw\t%0",
6668 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6670 "ficom%z2\t%y2\n\tfnstsw\t%0",
6671 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6672 NULL,
6673 NULL
6676 int mask;
6677 const char *ret;
6679 mask = eflags_p << 3;
6680 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6681 mask |= unordered_p << 1;
6682 mask |= stack_top_dies;
6684 if (mask >= 24)
6685 abort ();
6686 ret = alt[mask];
6687 if (ret == NULL)
6688 abort ();
6690 return ret;
6694 void
6695 ix86_output_addr_vec_elt (file, value)
6696 FILE *file;
6697 int value;
6699 const char *directive = ASM_LONG;
6701 if (TARGET_64BIT)
6703 #ifdef ASM_QUAD
6704 directive = ASM_QUAD;
6705 #else
6706 abort ();
6707 #endif
6710 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6713 void
6714 ix86_output_addr_diff_elt (file, value, rel)
6715 FILE *file;
6716 int value, rel;
6718 if (TARGET_64BIT)
6719 fprintf (file, "%s%s%d-%s%d\n",
6720 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6721 else if (HAVE_AS_GOTOFF_IN_DATA)
6722 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6723 else
6724 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6725 ASM_LONG, LPREFIX, value);
6728 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6729 for the target. */
6731 void
6732 ix86_expand_clear (dest)
6733 rtx dest;
6735 rtx tmp;
6737 /* We play register width games, which are only valid after reload. */
6738 if (!reload_completed)
6739 abort ();
6741 /* Avoid HImode and its attendant prefix byte. */
6742 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6743 dest = gen_rtx_REG (SImode, REGNO (dest));
6745 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6747 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6748 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6750 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6751 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6754 emit_insn (tmp);
6757 void
6758 ix86_expand_move (mode, operands)
6759 enum machine_mode mode;
6760 rtx operands[];
6762 int strict = (reload_in_progress || reload_completed);
6763 rtx insn;
6765 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6767 /* Emit insns to move operands[1] into operands[0]. */
6769 if (GET_CODE (operands[0]) == MEM)
6770 operands[1] = force_reg (Pmode, operands[1]);
6771 else
6773 rtx temp = operands[0];
6774 if (GET_CODE (temp) != REG)
6775 temp = gen_reg_rtx (Pmode);
6776 temp = legitimize_pic_address (operands[1], temp);
6777 if (temp == operands[0])
6778 return;
6779 operands[1] = temp;
6782 else
6784 if (GET_CODE (operands[0]) == MEM
6785 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6786 || !push_operand (operands[0], mode))
6787 && GET_CODE (operands[1]) == MEM)
6788 operands[1] = force_reg (mode, operands[1]);
6790 if (push_operand (operands[0], mode)
6791 && ! general_no_elim_operand (operands[1], mode))
6792 operands[1] = copy_to_mode_reg (mode, operands[1]);
6794 /* Force large constants in 64bit compilation into register
6795 to get them CSEed. */
6796 if (TARGET_64BIT && mode == DImode
6797 && immediate_operand (operands[1], mode)
6798 && !x86_64_zero_extended_value (operands[1])
6799 && !register_operand (operands[0], mode)
6800 && optimize && !reload_completed && !reload_in_progress)
6801 operands[1] = copy_to_mode_reg (mode, operands[1]);
6803 if (FLOAT_MODE_P (mode))
6805 /* If we are loading a floating point constant to a register,
6806 force the value to memory now, since we'll get better code
6807 out the back end. */
6809 if (strict)
6811 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6812 && register_operand (operands[0], mode))
6813 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6817 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6819 emit_insn (insn);
6822 void
6823 ix86_expand_vector_move (mode, operands)
6824 enum machine_mode mode;
6825 rtx operands[];
6827 /* Force constants other than zero into memory. We do not know how
6828 the instructions used to build constants modify the upper 64 bits
6829 of the register, once we have that information we may be able
6830 to handle some of them more efficiently. */
6831 if ((reload_in_progress | reload_completed) == 0
6832 && register_operand (operands[0], mode)
6833 && CONSTANT_P (operands[1]))
6835 rtx addr = gen_reg_rtx (Pmode);
6836 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6837 operands[1] = gen_rtx_MEM (mode, addr);
6840 /* Make operand1 a register if it isn't already. */
6841 if ((reload_in_progress | reload_completed) == 0
6842 && !register_operand (operands[0], mode)
6843 && !register_operand (operands[1], mode)
6844 && operands[1] != CONST0_RTX (mode))
6846 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
6847 emit_move_insn (operands[0], temp);
6848 return;
6851 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6854 /* Attempt to expand a binary operator. Make the expansion closer to the
6855 actual machine, then just general_operand, which will allow 3 separate
6856 memory references (one output, two input) in a single insn. */
6858 void
6859 ix86_expand_binary_operator (code, mode, operands)
6860 enum rtx_code code;
6861 enum machine_mode mode;
6862 rtx operands[];
6864 int matching_memory;
6865 rtx src1, src2, dst, op, clob;
6867 dst = operands[0];
6868 src1 = operands[1];
6869 src2 = operands[2];
6871 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6872 if (GET_RTX_CLASS (code) == 'c'
6873 && (rtx_equal_p (dst, src2)
6874 || immediate_operand (src1, mode)))
6876 rtx temp = src1;
6877 src1 = src2;
6878 src2 = temp;
6881 /* If the destination is memory, and we do not have matching source
6882 operands, do things in registers. */
6883 matching_memory = 0;
6884 if (GET_CODE (dst) == MEM)
6886 if (rtx_equal_p (dst, src1))
6887 matching_memory = 1;
6888 else if (GET_RTX_CLASS (code) == 'c'
6889 && rtx_equal_p (dst, src2))
6890 matching_memory = 2;
6891 else
6892 dst = gen_reg_rtx (mode);
6895 /* Both source operands cannot be in memory. */
6896 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6898 if (matching_memory != 2)
6899 src2 = force_reg (mode, src2);
6900 else
6901 src1 = force_reg (mode, src1);
6904 /* If the operation is not commutable, source 1 cannot be a constant
6905 or non-matching memory. */
6906 if ((CONSTANT_P (src1)
6907 || (!matching_memory && GET_CODE (src1) == MEM))
6908 && GET_RTX_CLASS (code) != 'c')
6909 src1 = force_reg (mode, src1);
6911 /* If optimizing, copy to regs to improve CSE */
6912 if (optimize && ! no_new_pseudos)
6914 if (GET_CODE (dst) == MEM)
6915 dst = gen_reg_rtx (mode);
6916 if (GET_CODE (src1) == MEM)
6917 src1 = force_reg (mode, src1);
6918 if (GET_CODE (src2) == MEM)
6919 src2 = force_reg (mode, src2);
6922 /* Emit the instruction. */
6924 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6925 if (reload_in_progress)
6927 /* Reload doesn't know about the flags register, and doesn't know that
6928 it doesn't want to clobber it. We can only do this with PLUS. */
6929 if (code != PLUS)
6930 abort ();
6931 emit_insn (op);
6933 else
6935 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6936 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6939 /* Fix up the destination if needed. */
6940 if (dst != operands[0])
6941 emit_move_insn (operands[0], dst);
6944 /* Return TRUE or FALSE depending on whether the binary operator meets the
6945 appropriate constraints. */
6948 ix86_binary_operator_ok (code, mode, operands)
6949 enum rtx_code code;
6950 enum machine_mode mode ATTRIBUTE_UNUSED;
6951 rtx operands[3];
6953 /* Both source operands cannot be in memory. */
6954 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6955 return 0;
6956 /* If the operation is not commutable, source 1 cannot be a constant. */
6957 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6958 return 0;
6959 /* If the destination is memory, we must have a matching source operand. */
6960 if (GET_CODE (operands[0]) == MEM
6961 && ! (rtx_equal_p (operands[0], operands[1])
6962 || (GET_RTX_CLASS (code) == 'c'
6963 && rtx_equal_p (operands[0], operands[2]))))
6964 return 0;
6965 /* If the operation is not commutable and the source 1 is memory, we must
6966 have a matching destination. */
6967 if (GET_CODE (operands[1]) == MEM
6968 && GET_RTX_CLASS (code) != 'c'
6969 && ! rtx_equal_p (operands[0], operands[1]))
6970 return 0;
6971 return 1;
6974 /* Attempt to expand a unary operator. Make the expansion closer to the
6975 actual machine, then just general_operand, which will allow 2 separate
6976 memory references (one output, one input) in a single insn. */
6978 void
6979 ix86_expand_unary_operator (code, mode, operands)
6980 enum rtx_code code;
6981 enum machine_mode mode;
6982 rtx operands[];
6984 int matching_memory;
6985 rtx src, dst, op, clob;
6987 dst = operands[0];
6988 src = operands[1];
6990 /* If the destination is memory, and we do not have matching source
6991 operands, do things in registers. */
6992 matching_memory = 0;
6993 if (GET_CODE (dst) == MEM)
6995 if (rtx_equal_p (dst, src))
6996 matching_memory = 1;
6997 else
6998 dst = gen_reg_rtx (mode);
7001 /* When source operand is memory, destination must match. */
7002 if (!matching_memory && GET_CODE (src) == MEM)
7003 src = force_reg (mode, src);
7005 /* If optimizing, copy to regs to improve CSE */
7006 if (optimize && ! no_new_pseudos)
7008 if (GET_CODE (dst) == MEM)
7009 dst = gen_reg_rtx (mode);
7010 if (GET_CODE (src) == MEM)
7011 src = force_reg (mode, src);
7014 /* Emit the instruction. */
7016 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7017 if (reload_in_progress || code == NOT)
7019 /* Reload doesn't know about the flags register, and doesn't know that
7020 it doesn't want to clobber it. */
7021 if (code != NOT)
7022 abort ();
7023 emit_insn (op);
7025 else
7027 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7028 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7031 /* Fix up the destination if needed. */
7032 if (dst != operands[0])
7033 emit_move_insn (operands[0], dst);
7036 /* Return TRUE or FALSE depending on whether the unary operator meets the
7037 appropriate constraints. */
7040 ix86_unary_operator_ok (code, mode, operands)
7041 enum rtx_code code ATTRIBUTE_UNUSED;
7042 enum machine_mode mode ATTRIBUTE_UNUSED;
7043 rtx operands[2] ATTRIBUTE_UNUSED;
7045 /* If one of operands is memory, source and destination must match. */
7046 if ((GET_CODE (operands[0]) == MEM
7047 || GET_CODE (operands[1]) == MEM)
7048 && ! rtx_equal_p (operands[0], operands[1]))
7049 return FALSE;
7050 return TRUE;
7053 /* Return TRUE or FALSE depending on whether the first SET in INSN
7054 has source and destination with matching CC modes, and that the
7055 CC mode is at least as constrained as REQ_MODE. */
7058 ix86_match_ccmode (insn, req_mode)
7059 rtx insn;
7060 enum machine_mode req_mode;
7062 rtx set;
7063 enum machine_mode set_mode;
7065 set = PATTERN (insn);
7066 if (GET_CODE (set) == PARALLEL)
7067 set = XVECEXP (set, 0, 0);
7068 if (GET_CODE (set) != SET)
7069 abort ();
7070 if (GET_CODE (SET_SRC (set)) != COMPARE)
7071 abort ();
7073 set_mode = GET_MODE (SET_DEST (set));
7074 switch (set_mode)
7076 case CCNOmode:
7077 if (req_mode != CCNOmode
7078 && (req_mode != CCmode
7079 || XEXP (SET_SRC (set), 1) != const0_rtx))
7080 return 0;
7081 break;
7082 case CCmode:
7083 if (req_mode == CCGCmode)
7084 return 0;
7085 /* FALLTHRU */
7086 case CCGCmode:
7087 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7088 return 0;
7089 /* FALLTHRU */
7090 case CCGOCmode:
7091 if (req_mode == CCZmode)
7092 return 0;
7093 /* FALLTHRU */
7094 case CCZmode:
7095 break;
7097 default:
7098 abort ();
7101 return (GET_MODE (SET_SRC (set)) == set_mode);
7104 /* Generate insn patterns to do an integer compare of OPERANDS. */
7106 static rtx
7107 ix86_expand_int_compare (code, op0, op1)
7108 enum rtx_code code;
7109 rtx op0, op1;
7111 enum machine_mode cmpmode;
7112 rtx tmp, flags;
7114 cmpmode = SELECT_CC_MODE (code, op0, op1);
7115 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7117 /* This is very simple, but making the interface the same as in the
7118 FP case makes the rest of the code easier. */
7119 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7120 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7122 /* Return the test that should be put into the flags user, i.e.
7123 the bcc, scc, or cmov instruction. */
7124 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7127 /* Figure out whether to use ordered or unordered fp comparisons.
7128 Return the appropriate mode to use. */
7130 enum machine_mode
7131 ix86_fp_compare_mode (code)
7132 enum rtx_code code ATTRIBUTE_UNUSED;
7134 /* ??? In order to make all comparisons reversible, we do all comparisons
7135 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7136 all forms trapping and nontrapping comparisons, we can make inequality
7137 comparisons trapping again, since it results in better code when using
7138 FCOM based compares. */
7139 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7142 enum machine_mode
7143 ix86_cc_mode (code, op0, op1)
7144 enum rtx_code code;
7145 rtx op0, op1;
7147 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7148 return ix86_fp_compare_mode (code);
7149 switch (code)
7151 /* Only zero flag is needed. */
7152 case EQ: /* ZF=0 */
7153 case NE: /* ZF!=0 */
7154 return CCZmode;
7155 /* Codes needing carry flag. */
7156 case GEU: /* CF=0 */
7157 case GTU: /* CF=0 & ZF=0 */
7158 case LTU: /* CF=1 */
7159 case LEU: /* CF=1 | ZF=1 */
7160 return CCmode;
7161 /* Codes possibly doable only with sign flag when
7162 comparing against zero. */
7163 case GE: /* SF=OF or SF=0 */
7164 case LT: /* SF<>OF or SF=1 */
7165 if (op1 == const0_rtx)
7166 return CCGOCmode;
7167 else
7168 /* For other cases Carry flag is not required. */
7169 return CCGCmode;
7170 /* Codes doable only with sign flag when comparing
7171 against zero, but we miss jump instruction for it
7172 so we need to use relational tests agains overflow
7173 that thus needs to be zero. */
7174 case GT: /* ZF=0 & SF=OF */
7175 case LE: /* ZF=1 | SF<>OF */
7176 if (op1 == const0_rtx)
7177 return CCNOmode;
7178 else
7179 return CCGCmode;
7180 /* strcmp pattern do (use flags) and combine may ask us for proper
7181 mode. */
7182 case USE:
7183 return CCmode;
7184 default:
7185 abort ();
7189 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7192 ix86_use_fcomi_compare (code)
7193 enum rtx_code code ATTRIBUTE_UNUSED;
7195 enum rtx_code swapped_code = swap_condition (code);
7196 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7197 || (ix86_fp_comparison_cost (swapped_code)
7198 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7201 /* Swap, force into registers, or otherwise massage the two operands
7202 to a fp comparison. The operands are updated in place; the new
7203 comparsion code is returned. */
7205 static enum rtx_code
7206 ix86_prepare_fp_compare_args (code, pop0, pop1)
7207 enum rtx_code code;
7208 rtx *pop0, *pop1;
7210 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7211 rtx op0 = *pop0, op1 = *pop1;
7212 enum machine_mode op_mode = GET_MODE (op0);
7213 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7215 /* All of the unordered compare instructions only work on registers.
7216 The same is true of the XFmode compare instructions. The same is
7217 true of the fcomi compare instructions. */
7219 if (!is_sse
7220 && (fpcmp_mode == CCFPUmode
7221 || op_mode == XFmode
7222 || op_mode == TFmode
7223 || ix86_use_fcomi_compare (code)))
7225 op0 = force_reg (op_mode, op0);
7226 op1 = force_reg (op_mode, op1);
7228 else
7230 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7231 things around if they appear profitable, otherwise force op0
7232 into a register. */
7234 if (standard_80387_constant_p (op0) == 0
7235 || (GET_CODE (op0) == MEM
7236 && ! (standard_80387_constant_p (op1) == 0
7237 || GET_CODE (op1) == MEM)))
7239 rtx tmp;
7240 tmp = op0, op0 = op1, op1 = tmp;
7241 code = swap_condition (code);
7244 if (GET_CODE (op0) != REG)
7245 op0 = force_reg (op_mode, op0);
7247 if (CONSTANT_P (op1))
7249 if (standard_80387_constant_p (op1))
7250 op1 = force_reg (op_mode, op1);
7251 else
7252 op1 = validize_mem (force_const_mem (op_mode, op1));
7256 /* Try to rearrange the comparison to make it cheaper. */
7257 if (ix86_fp_comparison_cost (code)
7258 > ix86_fp_comparison_cost (swap_condition (code))
7259 && (GET_CODE (op1) == REG || !no_new_pseudos))
7261 rtx tmp;
7262 tmp = op0, op0 = op1, op1 = tmp;
7263 code = swap_condition (code);
7264 if (GET_CODE (op0) != REG)
7265 op0 = force_reg (op_mode, op0);
7268 *pop0 = op0;
7269 *pop1 = op1;
7270 return code;
7273 /* Convert comparison codes we use to represent FP comparison to integer
7274 code that will result in proper branch. Return UNKNOWN if no such code
7275 is available. */
7276 static enum rtx_code
7277 ix86_fp_compare_code_to_integer (code)
7278 enum rtx_code code;
7280 switch (code)
7282 case GT:
7283 return GTU;
7284 case GE:
7285 return GEU;
7286 case ORDERED:
7287 case UNORDERED:
7288 return code;
7289 break;
7290 case UNEQ:
7291 return EQ;
7292 break;
7293 case UNLT:
7294 return LTU;
7295 break;
7296 case UNLE:
7297 return LEU;
7298 break;
7299 case LTGT:
7300 return NE;
7301 break;
7302 default:
7303 return UNKNOWN;
7307 /* Split comparison code CODE into comparisons we can do using branch
7308 instructions. BYPASS_CODE is comparison code for branch that will
7309 branch around FIRST_CODE and SECOND_CODE. If some of branches
7310 is not required, set value to NIL.
7311 We never require more than two branches. */
7312 static void
7313 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7314 enum rtx_code code, *bypass_code, *first_code, *second_code;
7316 *first_code = code;
7317 *bypass_code = NIL;
7318 *second_code = NIL;
7320 /* The fcomi comparison sets flags as follows:
7322 cmp ZF PF CF
7323 > 0 0 0
7324 < 0 0 1
7325 = 1 0 0
7326 un 1 1 1 */
7328 switch (code)
7330 case GT: /* GTU - CF=0 & ZF=0 */
7331 case GE: /* GEU - CF=0 */
7332 case ORDERED: /* PF=0 */
7333 case UNORDERED: /* PF=1 */
7334 case UNEQ: /* EQ - ZF=1 */
7335 case UNLT: /* LTU - CF=1 */
7336 case UNLE: /* LEU - CF=1 | ZF=1 */
7337 case LTGT: /* EQ - ZF=0 */
7338 break;
7339 case LT: /* LTU - CF=1 - fails on unordered */
7340 *first_code = UNLT;
7341 *bypass_code = UNORDERED;
7342 break;
7343 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7344 *first_code = UNLE;
7345 *bypass_code = UNORDERED;
7346 break;
7347 case EQ: /* EQ - ZF=1 - fails on unordered */
7348 *first_code = UNEQ;
7349 *bypass_code = UNORDERED;
7350 break;
7351 case NE: /* NE - ZF=0 - fails on unordered */
7352 *first_code = LTGT;
7353 *second_code = UNORDERED;
7354 break;
7355 case UNGE: /* GEU - CF=0 - fails on unordered */
7356 *first_code = GE;
7357 *second_code = UNORDERED;
7358 break;
7359 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7360 *first_code = GT;
7361 *second_code = UNORDERED;
7362 break;
7363 default:
7364 abort ();
7366 if (!TARGET_IEEE_FP)
7368 *second_code = NIL;
7369 *bypass_code = NIL;
7373 /* Return cost of comparison done fcom + arithmetics operations on AX.
7374 All following functions do use number of instructions as an cost metrics.
7375 In future this should be tweaked to compute bytes for optimize_size and
7376 take into account performance of various instructions on various CPUs. */
7377 static int
7378 ix86_fp_comparison_arithmetics_cost (code)
7379 enum rtx_code code;
7381 if (!TARGET_IEEE_FP)
7382 return 4;
7383 /* The cost of code output by ix86_expand_fp_compare. */
7384 switch (code)
7386 case UNLE:
7387 case UNLT:
7388 case LTGT:
7389 case GT:
7390 case GE:
7391 case UNORDERED:
7392 case ORDERED:
7393 case UNEQ:
7394 return 4;
7395 break;
7396 case LT:
7397 case NE:
7398 case EQ:
7399 case UNGE:
7400 return 5;
7401 break;
7402 case LE:
7403 case UNGT:
7404 return 6;
7405 break;
7406 default:
7407 abort ();
7411 /* Return cost of comparison done using fcomi operation.
7412 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7413 static int
7414 ix86_fp_comparison_fcomi_cost (code)
7415 enum rtx_code code;
7417 enum rtx_code bypass_code, first_code, second_code;
7418 /* Return arbitarily high cost when instruction is not supported - this
7419 prevents gcc from using it. */
7420 if (!TARGET_CMOVE)
7421 return 1024;
7422 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7423 return (bypass_code != NIL || second_code != NIL) + 2;
7426 /* Return cost of comparison done using sahf operation.
7427 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7428 static int
7429 ix86_fp_comparison_sahf_cost (code)
7430 enum rtx_code code;
7432 enum rtx_code bypass_code, first_code, second_code;
7433 /* Return arbitarily high cost when instruction is not preferred - this
7434 avoids gcc from using it. */
7435 if (!TARGET_USE_SAHF && !optimize_size)
7436 return 1024;
7437 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7438 return (bypass_code != NIL || second_code != NIL) + 3;
7441 /* Compute cost of the comparison done using any method.
7442 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7443 static int
7444 ix86_fp_comparison_cost (code)
7445 enum rtx_code code;
7447 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7448 int min;
7450 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7451 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7453 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7454 if (min > sahf_cost)
7455 min = sahf_cost;
7456 if (min > fcomi_cost)
7457 min = fcomi_cost;
7458 return min;
7461 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7463 static rtx
7464 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7465 enum rtx_code code;
7466 rtx op0, op1, scratch;
7467 rtx *second_test;
7468 rtx *bypass_test;
7470 enum machine_mode fpcmp_mode, intcmp_mode;
7471 rtx tmp, tmp2;
7472 int cost = ix86_fp_comparison_cost (code);
7473 enum rtx_code bypass_code, first_code, second_code;
7475 fpcmp_mode = ix86_fp_compare_mode (code);
7476 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7478 if (second_test)
7479 *second_test = NULL_RTX;
7480 if (bypass_test)
7481 *bypass_test = NULL_RTX;
7483 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7485 /* Do fcomi/sahf based test when profitable. */
7486 if ((bypass_code == NIL || bypass_test)
7487 && (second_code == NIL || second_test)
7488 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7490 if (TARGET_CMOVE)
7492 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7493 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7494 tmp);
7495 emit_insn (tmp);
7497 else
7499 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7500 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
7501 if (!scratch)
7502 scratch = gen_reg_rtx (HImode);
7503 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7504 emit_insn (gen_x86_sahf_1 (scratch));
7507 /* The FP codes work out to act like unsigned. */
7508 intcmp_mode = fpcmp_mode;
7509 code = first_code;
7510 if (bypass_code != NIL)
7511 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7512 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7513 const0_rtx);
7514 if (second_code != NIL)
7515 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7516 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7517 const0_rtx);
7519 else
7521 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7522 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7523 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
7524 if (!scratch)
7525 scratch = gen_reg_rtx (HImode);
7526 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7528 /* In the unordered case, we have to check C2 for NaN's, which
7529 doesn't happen to work out to anything nice combination-wise.
7530 So do some bit twiddling on the value we've got in AH to come
7531 up with an appropriate set of condition codes. */
7533 intcmp_mode = CCNOmode;
7534 switch (code)
7536 case GT:
7537 case UNGT:
7538 if (code == GT || !TARGET_IEEE_FP)
7540 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7541 code = EQ;
7543 else
7545 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7546 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7547 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7548 intcmp_mode = CCmode;
7549 code = GEU;
7551 break;
7552 case LT:
7553 case UNLT:
7554 if (code == LT && TARGET_IEEE_FP)
7556 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7557 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7558 intcmp_mode = CCmode;
7559 code = EQ;
7561 else
7563 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7564 code = NE;
7566 break;
7567 case GE:
7568 case UNGE:
7569 if (code == GE || !TARGET_IEEE_FP)
7571 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7572 code = EQ;
7574 else
7576 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7577 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7578 GEN_INT (0x01)));
7579 code = NE;
7581 break;
7582 case LE:
7583 case UNLE:
7584 if (code == LE && TARGET_IEEE_FP)
7586 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7587 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7588 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7589 intcmp_mode = CCmode;
7590 code = LTU;
7592 else
7594 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7595 code = NE;
7597 break;
7598 case EQ:
7599 case UNEQ:
7600 if (code == EQ && TARGET_IEEE_FP)
7602 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7603 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7604 intcmp_mode = CCmode;
7605 code = EQ;
7607 else
7609 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7610 code = NE;
7611 break;
7613 break;
7614 case NE:
7615 case LTGT:
7616 if (code == NE && TARGET_IEEE_FP)
7618 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7619 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7620 GEN_INT (0x40)));
7621 code = NE;
7623 else
7625 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7626 code = EQ;
7628 break;
7630 case UNORDERED:
7631 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7632 code = NE;
7633 break;
7634 case ORDERED:
7635 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7636 code = EQ;
7637 break;
7639 default:
7640 abort ();
7644 /* Return the test that should be put into the flags user, i.e.
7645 the bcc, scc, or cmov instruction. */
7646 return gen_rtx_fmt_ee (code, VOIDmode,
7647 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7648 const0_rtx);
7652 ix86_expand_compare (code, second_test, bypass_test)
7653 enum rtx_code code;
7654 rtx *second_test, *bypass_test;
7656 rtx op0, op1, ret;
7657 op0 = ix86_compare_op0;
7658 op1 = ix86_compare_op1;
7660 if (second_test)
7661 *second_test = NULL_RTX;
7662 if (bypass_test)
7663 *bypass_test = NULL_RTX;
7665 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7666 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7667 second_test, bypass_test);
7668 else
7669 ret = ix86_expand_int_compare (code, op0, op1);
7671 return ret;
7674 /* Return true if the CODE will result in nontrivial jump sequence. */
7675 bool
7676 ix86_fp_jump_nontrivial_p (code)
7677 enum rtx_code code;
7679 enum rtx_code bypass_code, first_code, second_code;
7680 if (!TARGET_CMOVE)
7681 return true;
7682 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7683 return bypass_code != NIL || second_code != NIL;
7686 void
7687 ix86_expand_branch (code, label)
7688 enum rtx_code code;
7689 rtx label;
7691 rtx tmp;
7693 switch (GET_MODE (ix86_compare_op0))
7695 case QImode:
7696 case HImode:
7697 case SImode:
7698 simple:
7699 tmp = ix86_expand_compare (code, NULL, NULL);
7700 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7701 gen_rtx_LABEL_REF (VOIDmode, label),
7702 pc_rtx);
7703 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7704 return;
7706 case SFmode:
7707 case DFmode:
7708 case XFmode:
7709 case TFmode:
7711 rtvec vec;
7712 int use_fcomi;
7713 enum rtx_code bypass_code, first_code, second_code;
7715 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7716 &ix86_compare_op1);
7718 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7720 /* Check whether we will use the natural sequence with one jump. If
7721 so, we can expand jump early. Otherwise delay expansion by
7722 creating compound insn to not confuse optimizers. */
7723 if (bypass_code == NIL && second_code == NIL
7724 && TARGET_CMOVE)
7726 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7727 gen_rtx_LABEL_REF (VOIDmode, label),
7728 pc_rtx, NULL_RTX);
7730 else
7732 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7733 ix86_compare_op0, ix86_compare_op1);
7734 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7735 gen_rtx_LABEL_REF (VOIDmode, label),
7736 pc_rtx);
7737 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7739 use_fcomi = ix86_use_fcomi_compare (code);
7740 vec = rtvec_alloc (3 + !use_fcomi);
7741 RTVEC_ELT (vec, 0) = tmp;
7742 RTVEC_ELT (vec, 1)
7743 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7744 RTVEC_ELT (vec, 2)
7745 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7746 if (! use_fcomi)
7747 RTVEC_ELT (vec, 3)
7748 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7750 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7752 return;
7755 case DImode:
7756 if (TARGET_64BIT)
7757 goto simple;
7758 /* Expand DImode branch into multiple compare+branch. */
7760 rtx lo[2], hi[2], label2;
7761 enum rtx_code code1, code2, code3;
7763 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7765 tmp = ix86_compare_op0;
7766 ix86_compare_op0 = ix86_compare_op1;
7767 ix86_compare_op1 = tmp;
7768 code = swap_condition (code);
7770 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7771 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7773 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7774 avoid two branches. This costs one extra insn, so disable when
7775 optimizing for size. */
7777 if ((code == EQ || code == NE)
7778 && (!optimize_size
7779 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7781 rtx xor0, xor1;
7783 xor1 = hi[0];
7784 if (hi[1] != const0_rtx)
7785 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7786 NULL_RTX, 0, OPTAB_WIDEN);
7788 xor0 = lo[0];
7789 if (lo[1] != const0_rtx)
7790 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7791 NULL_RTX, 0, OPTAB_WIDEN);
7793 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7794 NULL_RTX, 0, OPTAB_WIDEN);
7796 ix86_compare_op0 = tmp;
7797 ix86_compare_op1 = const0_rtx;
7798 ix86_expand_branch (code, label);
7799 return;
7802 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7803 op1 is a constant and the low word is zero, then we can just
7804 examine the high word. */
7806 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7807 switch (code)
7809 case LT: case LTU: case GE: case GEU:
7810 ix86_compare_op0 = hi[0];
7811 ix86_compare_op1 = hi[1];
7812 ix86_expand_branch (code, label);
7813 return;
7814 default:
7815 break;
7818 /* Otherwise, we need two or three jumps. */
7820 label2 = gen_label_rtx ();
7822 code1 = code;
7823 code2 = swap_condition (code);
7824 code3 = unsigned_condition (code);
7826 switch (code)
7828 case LT: case GT: case LTU: case GTU:
7829 break;
7831 case LE: code1 = LT; code2 = GT; break;
7832 case GE: code1 = GT; code2 = LT; break;
7833 case LEU: code1 = LTU; code2 = GTU; break;
7834 case GEU: code1 = GTU; code2 = LTU; break;
7836 case EQ: code1 = NIL; code2 = NE; break;
7837 case NE: code2 = NIL; break;
7839 default:
7840 abort ();
7844 * a < b =>
7845 * if (hi(a) < hi(b)) goto true;
7846 * if (hi(a) > hi(b)) goto false;
7847 * if (lo(a) < lo(b)) goto true;
7848 * false:
7851 ix86_compare_op0 = hi[0];
7852 ix86_compare_op1 = hi[1];
7854 if (code1 != NIL)
7855 ix86_expand_branch (code1, label);
7856 if (code2 != NIL)
7857 ix86_expand_branch (code2, label2);
7859 ix86_compare_op0 = lo[0];
7860 ix86_compare_op1 = lo[1];
7861 ix86_expand_branch (code3, label);
7863 if (code2 != NIL)
7864 emit_label (label2);
7865 return;
7868 default:
7869 abort ();
7873 /* Split branch based on floating point condition. */
7874 void
7875 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7876 enum rtx_code code;
7877 rtx op1, op2, target1, target2, tmp;
7879 rtx second, bypass;
7880 rtx label = NULL_RTX;
7881 rtx condition;
7882 int bypass_probability = -1, second_probability = -1, probability = -1;
7883 rtx i;
7885 if (target2 != pc_rtx)
7887 rtx tmp = target2;
7888 code = reverse_condition_maybe_unordered (code);
7889 target2 = target1;
7890 target1 = tmp;
7893 condition = ix86_expand_fp_compare (code, op1, op2,
7894 tmp, &second, &bypass);
7896 if (split_branch_probability >= 0)
7898 /* Distribute the probabilities across the jumps.
7899 Assume the BYPASS and SECOND to be always test
7900 for UNORDERED. */
7901 probability = split_branch_probability;
7903 /* Value of 1 is low enough to make no need for probability
7904 to be updated. Later we may run some experiments and see
7905 if unordered values are more frequent in practice. */
7906 if (bypass)
7907 bypass_probability = 1;
7908 if (second)
7909 second_probability = 1;
7911 if (bypass != NULL_RTX)
7913 label = gen_label_rtx ();
7914 i = emit_jump_insn (gen_rtx_SET
7915 (VOIDmode, pc_rtx,
7916 gen_rtx_IF_THEN_ELSE (VOIDmode,
7917 bypass,
7918 gen_rtx_LABEL_REF (VOIDmode,
7919 label),
7920 pc_rtx)));
7921 if (bypass_probability >= 0)
7922 REG_NOTES (i)
7923 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7924 GEN_INT (bypass_probability),
7925 REG_NOTES (i));
7927 i = emit_jump_insn (gen_rtx_SET
7928 (VOIDmode, pc_rtx,
7929 gen_rtx_IF_THEN_ELSE (VOIDmode,
7930 condition, target1, target2)));
7931 if (probability >= 0)
7932 REG_NOTES (i)
7933 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7934 GEN_INT (probability),
7935 REG_NOTES (i));
7936 if (second != NULL_RTX)
7938 i = emit_jump_insn (gen_rtx_SET
7939 (VOIDmode, pc_rtx,
7940 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7941 target2)));
7942 if (second_probability >= 0)
7943 REG_NOTES (i)
7944 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7945 GEN_INT (second_probability),
7946 REG_NOTES (i));
7948 if (label != NULL_RTX)
7949 emit_label (label);
7953 ix86_expand_setcc (code, dest)
7954 enum rtx_code code;
7955 rtx dest;
7957 rtx ret, tmp, tmpreg;
7958 rtx second_test, bypass_test;
7960 if (GET_MODE (ix86_compare_op0) == DImode
7961 && !TARGET_64BIT)
7962 return 0; /* FAIL */
7964 if (GET_MODE (dest) != QImode)
7965 abort ();
7967 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7968 PUT_MODE (ret, QImode);
7970 tmp = dest;
7971 tmpreg = dest;
7973 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7974 if (bypass_test || second_test)
7976 rtx test = second_test;
7977 int bypass = 0;
7978 rtx tmp2 = gen_reg_rtx (QImode);
7979 if (bypass_test)
7981 if (second_test)
7982 abort ();
7983 test = bypass_test;
7984 bypass = 1;
7985 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7987 PUT_MODE (test, QImode);
7988 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7990 if (bypass)
7991 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7992 else
7993 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7996 return 1; /* DONE */
8000 ix86_expand_int_movcc (operands)
8001 rtx operands[];
8003 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8004 rtx compare_seq, compare_op;
8005 rtx second_test, bypass_test;
8006 enum machine_mode mode = GET_MODE (operands[0]);
8008 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8009 In case comparsion is done with immediate, we can convert it to LTU or
8010 GEU by altering the integer. */
8012 if ((code == LEU || code == GTU)
8013 && GET_CODE (ix86_compare_op1) == CONST_INT
8014 && mode != HImode
8015 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
8016 /* The operand still must be representable as sign extended value. */
8017 && (!TARGET_64BIT
8018 || GET_MODE (ix86_compare_op0) != DImode
8019 || (unsigned int) INTVAL (ix86_compare_op1) != 0x7fffffff)
8020 && GET_CODE (operands[2]) == CONST_INT
8021 && GET_CODE (operands[3]) == CONST_INT)
8023 if (code == LEU)
8024 code = LTU;
8025 else
8026 code = GEU;
8027 ix86_compare_op1
8028 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8029 GET_MODE (ix86_compare_op0));
8032 start_sequence ();
8033 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8034 compare_seq = gen_sequence ();
8035 end_sequence ();
8037 compare_code = GET_CODE (compare_op);
8039 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8040 HImode insns, we'd be swallowed in word prefix ops. */
8042 if (mode != HImode
8043 && (mode != DImode || TARGET_64BIT)
8044 && GET_CODE (operands[2]) == CONST_INT
8045 && GET_CODE (operands[3]) == CONST_INT)
8047 rtx out = operands[0];
8048 HOST_WIDE_INT ct = INTVAL (operands[2]);
8049 HOST_WIDE_INT cf = INTVAL (operands[3]);
8050 HOST_WIDE_INT diff;
8052 if ((compare_code == LTU || compare_code == GEU)
8053 && !second_test && !bypass_test)
8056 /* Detect overlap between destination and compare sources. */
8057 rtx tmp = out;
8059 /* To simplify rest of code, restrict to the GEU case. */
8060 if (compare_code == LTU)
8062 int tmp = ct;
8063 ct = cf;
8064 cf = tmp;
8065 compare_code = reverse_condition (compare_code);
8066 code = reverse_condition (code);
8068 diff = ct - cf;
8070 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8071 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8072 tmp = gen_reg_rtx (mode);
8074 emit_insn (compare_seq);
8075 if (mode == DImode)
8076 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8077 else
8078 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8080 if (diff == 1)
8083 * cmpl op0,op1
8084 * sbbl dest,dest
8085 * [addl dest, ct]
8087 * Size 5 - 8.
8089 if (ct)
8090 tmp = expand_simple_binop (mode, PLUS,
8091 tmp, GEN_INT (ct),
8092 tmp, 1, OPTAB_DIRECT);
8094 else if (cf == -1)
8097 * cmpl op0,op1
8098 * sbbl dest,dest
8099 * orl $ct, dest
8101 * Size 8.
8103 tmp = expand_simple_binop (mode, IOR,
8104 tmp, GEN_INT (ct),
8105 tmp, 1, OPTAB_DIRECT);
8107 else if (diff == -1 && ct)
8110 * cmpl op0,op1
8111 * sbbl dest,dest
8112 * xorl $-1, dest
8113 * [addl dest, cf]
8115 * Size 8 - 11.
8117 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8118 if (cf)
8119 tmp = expand_simple_binop (mode, PLUS,
8120 tmp, GEN_INT (cf),
8121 tmp, 1, OPTAB_DIRECT);
8123 else
8126 * cmpl op0,op1
8127 * sbbl dest,dest
8128 * andl cf - ct, dest
8129 * [addl dest, ct]
8131 * Size 8 - 11.
8133 tmp = expand_simple_binop (mode, AND,
8134 tmp,
8135 gen_int_mode (cf - ct, mode),
8136 tmp, 1, OPTAB_DIRECT);
8137 if (ct)
8138 tmp = expand_simple_binop (mode, PLUS,
8139 tmp, GEN_INT (ct),
8140 tmp, 1, OPTAB_DIRECT);
8143 if (tmp != out)
8144 emit_move_insn (out, tmp);
8146 return 1; /* DONE */
8149 diff = ct - cf;
8150 if (diff < 0)
8152 HOST_WIDE_INT tmp;
8153 tmp = ct, ct = cf, cf = tmp;
8154 diff = -diff;
8155 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8157 /* We may be reversing unordered compare to normal compare, that
8158 is not valid in general (we may convert non-trapping condition
8159 to trapping one), however on i386 we currently emit all
8160 comparisons unordered. */
8161 compare_code = reverse_condition_maybe_unordered (compare_code);
8162 code = reverse_condition_maybe_unordered (code);
8164 else
8166 compare_code = reverse_condition (compare_code);
8167 code = reverse_condition (code);
8171 compare_code = NIL;
8172 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8173 && GET_CODE (ix86_compare_op1) == CONST_INT)
8175 if (ix86_compare_op1 == const0_rtx
8176 && (code == LT || code == GE))
8177 compare_code = code;
8178 else if (ix86_compare_op1 == constm1_rtx)
8180 if (code == LE)
8181 compare_code = LT;
8182 else if (code == GT)
8183 compare_code = GE;
8187 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8188 if (compare_code != NIL
8189 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8190 && (cf == -1 || ct == -1))
8192 /* If lea code below could be used, only optimize
8193 if it results in a 2 insn sequence. */
8195 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8196 || diff == 3 || diff == 5 || diff == 9)
8197 || (compare_code == LT && ct == -1)
8198 || (compare_code == GE && cf == -1))
8201 * notl op1 (if necessary)
8202 * sarl $31, op1
8203 * orl cf, op1
8205 if (ct != -1)
8207 cf = ct;
8208 ct = -1;
8209 code = reverse_condition (code);
8212 out = emit_store_flag (out, code, ix86_compare_op0,
8213 ix86_compare_op1, VOIDmode, 0, -1);
8215 out = expand_simple_binop (mode, IOR,
8216 out, GEN_INT (cf),
8217 out, 1, OPTAB_DIRECT);
8218 if (out != operands[0])
8219 emit_move_insn (operands[0], out);
8221 return 1; /* DONE */
8225 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8226 || diff == 3 || diff == 5 || diff == 9)
8227 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8230 * xorl dest,dest
8231 * cmpl op1,op2
8232 * setcc dest
8233 * lea cf(dest*(ct-cf)),dest
8235 * Size 14.
8237 * This also catches the degenerate setcc-only case.
8240 rtx tmp;
8241 int nops;
8243 out = emit_store_flag (out, code, ix86_compare_op0,
8244 ix86_compare_op1, VOIDmode, 0, 1);
8246 nops = 0;
8247 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8248 done in proper mode to match. */
8249 if (diff == 1)
8250 tmp = out;
8251 else
8253 rtx out1;
8254 out1 = out;
8255 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8256 nops++;
8257 if (diff & 1)
8259 tmp = gen_rtx_PLUS (mode, tmp, out1);
8260 nops++;
8263 if (cf != 0)
8265 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8266 nops++;
8268 if (tmp != out
8269 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8271 if (nops == 1)
8273 rtx clob;
8275 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8276 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8278 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8279 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8280 emit_insn (tmp);
8282 else
8283 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8285 if (out != operands[0])
8286 emit_move_insn (operands[0], out);
8288 return 1; /* DONE */
8292 * General case: Jumpful:
8293 * xorl dest,dest cmpl op1, op2
8294 * cmpl op1, op2 movl ct, dest
8295 * setcc dest jcc 1f
8296 * decl dest movl cf, dest
8297 * andl (cf-ct),dest 1:
8298 * addl ct,dest
8300 * Size 20. Size 14.
8302 * This is reasonably steep, but branch mispredict costs are
8303 * high on modern cpus, so consider failing only if optimizing
8304 * for space.
8306 * %%% Parameterize branch_cost on the tuning architecture, then
8307 * use that. The 80386 couldn't care less about mispredicts.
8310 if (!optimize_size && !TARGET_CMOVE)
8312 if (ct == 0)
8314 ct = cf;
8315 cf = 0;
8316 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8317 /* We may be reversing unordered compare to normal compare,
8318 that is not valid in general (we may convert non-trapping
8319 condition to trapping one), however on i386 we currently
8320 emit all comparisons unordered. */
8321 code = reverse_condition_maybe_unordered (code);
8322 else
8324 code = reverse_condition (code);
8325 if (compare_code != NIL)
8326 compare_code = reverse_condition (compare_code);
8330 if (compare_code != NIL)
8332 /* notl op1 (if needed)
8333 sarl $31, op1
8334 andl (cf-ct), op1
8335 addl ct, op1
8337 For x < 0 (resp. x <= -1) there will be no notl,
8338 so if possible swap the constants to get rid of the
8339 complement.
8340 True/false will be -1/0 while code below (store flag
8341 followed by decrement) is 0/-1, so the constants need
8342 to be exchanged once more. */
8344 if (compare_code == GE || !cf)
8346 code = reverse_condition (code);
8347 compare_code = LT;
8349 else
8351 HOST_WIDE_INT tmp = cf;
8352 cf = ct;
8353 ct = tmp;
8356 out = emit_store_flag (out, code, ix86_compare_op0,
8357 ix86_compare_op1, VOIDmode, 0, -1);
8359 else
8361 out = emit_store_flag (out, code, ix86_compare_op0,
8362 ix86_compare_op1, VOIDmode, 0, 1);
8364 out = expand_simple_binop (mode, PLUS,
8365 out, constm1_rtx,
8366 out, 1, OPTAB_DIRECT);
8369 out = expand_simple_binop (mode, AND,
8370 out,
8371 gen_int_mode (cf - ct, mode),
8372 out, 1, OPTAB_DIRECT);
8373 out = expand_simple_binop (mode, PLUS,
8374 out, GEN_INT (ct),
8375 out, 1, OPTAB_DIRECT);
8376 if (out != operands[0])
8377 emit_move_insn (operands[0], out);
8379 return 1; /* DONE */
8383 if (!TARGET_CMOVE)
8385 /* Try a few things more with specific constants and a variable. */
8387 optab op;
8388 rtx var, orig_out, out, tmp;
8390 if (optimize_size)
8391 return 0; /* FAIL */
8393 /* If one of the two operands is an interesting constant, load a
8394 constant with the above and mask it in with a logical operation. */
8396 if (GET_CODE (operands[2]) == CONST_INT)
8398 var = operands[3];
8399 if (INTVAL (operands[2]) == 0)
8400 operands[3] = constm1_rtx, op = and_optab;
8401 else if (INTVAL (operands[2]) == -1)
8402 operands[3] = const0_rtx, op = ior_optab;
8403 else
8404 return 0; /* FAIL */
8406 else if (GET_CODE (operands[3]) == CONST_INT)
8408 var = operands[2];
8409 if (INTVAL (operands[3]) == 0)
8410 operands[2] = constm1_rtx, op = and_optab;
8411 else if (INTVAL (operands[3]) == -1)
8412 operands[2] = const0_rtx, op = ior_optab;
8413 else
8414 return 0; /* FAIL */
8416 else
8417 return 0; /* FAIL */
8419 orig_out = operands[0];
8420 tmp = gen_reg_rtx (mode);
8421 operands[0] = tmp;
8423 /* Recurse to get the constant loaded. */
8424 if (ix86_expand_int_movcc (operands) == 0)
8425 return 0; /* FAIL */
8427 /* Mask in the interesting variable. */
8428 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8429 OPTAB_WIDEN);
8430 if (out != orig_out)
8431 emit_move_insn (orig_out, out);
8433 return 1; /* DONE */
8437 * For comparison with above,
8439 * movl cf,dest
8440 * movl ct,tmp
8441 * cmpl op1,op2
8442 * cmovcc tmp,dest
8444 * Size 15.
8447 if (! nonimmediate_operand (operands[2], mode))
8448 operands[2] = force_reg (mode, operands[2]);
8449 if (! nonimmediate_operand (operands[3], mode))
8450 operands[3] = force_reg (mode, operands[3]);
8452 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8454 rtx tmp = gen_reg_rtx (mode);
8455 emit_move_insn (tmp, operands[3]);
8456 operands[3] = tmp;
8458 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8460 rtx tmp = gen_reg_rtx (mode);
8461 emit_move_insn (tmp, operands[2]);
8462 operands[2] = tmp;
8464 if (! register_operand (operands[2], VOIDmode)
8465 && ! register_operand (operands[3], VOIDmode))
8466 operands[2] = force_reg (mode, operands[2]);
8468 emit_insn (compare_seq);
8469 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8470 gen_rtx_IF_THEN_ELSE (mode,
8471 compare_op, operands[2],
8472 operands[3])));
8473 if (bypass_test)
8474 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8475 gen_rtx_IF_THEN_ELSE (mode,
8476 bypass_test,
8477 operands[3],
8478 operands[0])));
8479 if (second_test)
8480 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8481 gen_rtx_IF_THEN_ELSE (mode,
8482 second_test,
8483 operands[2],
8484 operands[0])));
8486 return 1; /* DONE */
8490 ix86_expand_fp_movcc (operands)
8491 rtx operands[];
8493 enum rtx_code code;
8494 rtx tmp;
8495 rtx compare_op, second_test, bypass_test;
8497 /* For SF/DFmode conditional moves based on comparisons
8498 in same mode, we may want to use SSE min/max instructions. */
8499 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8500 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8501 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8502 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8503 && (!TARGET_IEEE_FP
8504 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8505 /* We may be called from the post-reload splitter. */
8506 && (!REG_P (operands[0])
8507 || SSE_REG_P (operands[0])
8508 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8510 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8511 code = GET_CODE (operands[1]);
8513 /* See if we have (cross) match between comparison operands and
8514 conditional move operands. */
8515 if (rtx_equal_p (operands[2], op1))
8517 rtx tmp = op0;
8518 op0 = op1;
8519 op1 = tmp;
8520 code = reverse_condition_maybe_unordered (code);
8522 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8524 /* Check for min operation. */
8525 if (code == LT)
8527 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8528 if (memory_operand (op0, VOIDmode))
8529 op0 = force_reg (GET_MODE (operands[0]), op0);
8530 if (GET_MODE (operands[0]) == SFmode)
8531 emit_insn (gen_minsf3 (operands[0], op0, op1));
8532 else
8533 emit_insn (gen_mindf3 (operands[0], op0, op1));
8534 return 1;
8536 /* Check for max operation. */
8537 if (code == GT)
8539 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8540 if (memory_operand (op0, VOIDmode))
8541 op0 = force_reg (GET_MODE (operands[0]), op0);
8542 if (GET_MODE (operands[0]) == SFmode)
8543 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8544 else
8545 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8546 return 1;
8549 /* Manage condition to be sse_comparison_operator. In case we are
8550 in non-ieee mode, try to canonicalize the destination operand
8551 to be first in the comparison - this helps reload to avoid extra
8552 moves. */
8553 if (!sse_comparison_operator (operands[1], VOIDmode)
8554 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8556 rtx tmp = ix86_compare_op0;
8557 ix86_compare_op0 = ix86_compare_op1;
8558 ix86_compare_op1 = tmp;
8559 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8560 VOIDmode, ix86_compare_op0,
8561 ix86_compare_op1);
8563 /* Similary try to manage result to be first operand of conditional
8564 move. We also don't support the NE comparison on SSE, so try to
8565 avoid it. */
8566 if ((rtx_equal_p (operands[0], operands[3])
8567 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8568 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8570 rtx tmp = operands[2];
8571 operands[2] = operands[3];
8572 operands[3] = tmp;
8573 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8574 (GET_CODE (operands[1])),
8575 VOIDmode, ix86_compare_op0,
8576 ix86_compare_op1);
8578 if (GET_MODE (operands[0]) == SFmode)
8579 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8580 operands[2], operands[3],
8581 ix86_compare_op0, ix86_compare_op1));
8582 else
8583 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8584 operands[2], operands[3],
8585 ix86_compare_op0, ix86_compare_op1));
8586 return 1;
8589 /* The floating point conditional move instructions don't directly
8590 support conditions resulting from a signed integer comparison. */
8592 code = GET_CODE (operands[1]);
8593 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8595 /* The floating point conditional move instructions don't directly
8596 support signed integer comparisons. */
8598 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8600 if (second_test != NULL || bypass_test != NULL)
8601 abort ();
8602 tmp = gen_reg_rtx (QImode);
8603 ix86_expand_setcc (code, tmp);
8604 code = NE;
8605 ix86_compare_op0 = tmp;
8606 ix86_compare_op1 = const0_rtx;
8607 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8609 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8611 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8612 emit_move_insn (tmp, operands[3]);
8613 operands[3] = tmp;
8615 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8617 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8618 emit_move_insn (tmp, operands[2]);
8619 operands[2] = tmp;
8622 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8623 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8624 compare_op,
8625 operands[2],
8626 operands[3])));
8627 if (bypass_test)
8628 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8629 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8630 bypass_test,
8631 operands[3],
8632 operands[0])));
8633 if (second_test)
8634 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8635 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8636 second_test,
8637 operands[2],
8638 operands[0])));
8640 return 1;
8643 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8644 works for floating pointer parameters and nonoffsetable memories.
8645 For pushes, it returns just stack offsets; the values will be saved
8646 in the right order. Maximally three parts are generated. */
8648 static int
8649 ix86_split_to_parts (operand, parts, mode)
8650 rtx operand;
8651 rtx *parts;
8652 enum machine_mode mode;
8654 int size;
8656 if (!TARGET_64BIT)
8657 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8658 else
8659 size = (GET_MODE_SIZE (mode) + 4) / 8;
8661 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8662 abort ();
8663 if (size < 2 || size > 3)
8664 abort ();
8666 /* Optimize constant pool reference to immediates. This is used by fp moves,
8667 that force all constants to memory to allow combining. */
8669 if (GET_CODE (operand) == MEM
8670 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8671 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8672 operand = get_pool_constant (XEXP (operand, 0));
8674 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8676 /* The only non-offsetable memories we handle are pushes. */
8677 if (! push_operand (operand, VOIDmode))
8678 abort ();
8680 operand = copy_rtx (operand);
8681 PUT_MODE (operand, Pmode);
8682 parts[0] = parts[1] = parts[2] = operand;
8684 else if (!TARGET_64BIT)
8686 if (mode == DImode)
8687 split_di (&operand, 1, &parts[0], &parts[1]);
8688 else
8690 if (REG_P (operand))
8692 if (!reload_completed)
8693 abort ();
8694 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8695 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8696 if (size == 3)
8697 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8699 else if (offsettable_memref_p (operand))
8701 operand = adjust_address (operand, SImode, 0);
8702 parts[0] = operand;
8703 parts[1] = adjust_address (operand, SImode, 4);
8704 if (size == 3)
8705 parts[2] = adjust_address (operand, SImode, 8);
8707 else if (GET_CODE (operand) == CONST_DOUBLE)
8709 REAL_VALUE_TYPE r;
8710 long l[4];
8712 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8713 switch (mode)
8715 case XFmode:
8716 case TFmode:
8717 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8718 parts[2] = gen_int_mode (l[2], SImode);
8719 break;
8720 case DFmode:
8721 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8722 break;
8723 default:
8724 abort ();
8726 parts[1] = gen_int_mode (l[1], SImode);
8727 parts[0] = gen_int_mode (l[0], SImode);
8729 else
8730 abort ();
8733 else
8735 if (mode == TImode)
8736 split_ti (&operand, 1, &parts[0], &parts[1]);
8737 if (mode == XFmode || mode == TFmode)
8739 if (REG_P (operand))
8741 if (!reload_completed)
8742 abort ();
8743 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8744 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8746 else if (offsettable_memref_p (operand))
8748 operand = adjust_address (operand, DImode, 0);
8749 parts[0] = operand;
8750 parts[1] = adjust_address (operand, SImode, 8);
8752 else if (GET_CODE (operand) == CONST_DOUBLE)
8754 REAL_VALUE_TYPE r;
8755 long l[3];
8757 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8758 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8759 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8760 if (HOST_BITS_PER_WIDE_INT >= 64)
8761 parts[0]
8762 = gen_int_mode
8763 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8764 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
8765 DImode);
8766 else
8767 parts[0] = immed_double_const (l[0], l[1], DImode);
8768 parts[1] = gen_int_mode (l[2], SImode);
8770 else
8771 abort ();
8775 return size;
8778 /* Emit insns to perform a move or push of DI, DF, and XF values.
8779 Return false when normal moves are needed; true when all required
8780 insns have been emitted. Operands 2-4 contain the input values
8781 int the correct order; operands 5-7 contain the output values. */
8783 void
8784 ix86_split_long_move (operands)
8785 rtx operands[];
8787 rtx part[2][3];
8788 int nparts;
8789 int push = 0;
8790 int collisions = 0;
8791 enum machine_mode mode = GET_MODE (operands[0]);
8793 /* The DFmode expanders may ask us to move double.
8794 For 64bit target this is single move. By hiding the fact
8795 here we simplify i386.md splitters. */
8796 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8798 /* Optimize constant pool reference to immediates. This is used by
8799 fp moves, that force all constants to memory to allow combining. */
8801 if (GET_CODE (operands[1]) == MEM
8802 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8803 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8804 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8805 if (push_operand (operands[0], VOIDmode))
8807 operands[0] = copy_rtx (operands[0]);
8808 PUT_MODE (operands[0], Pmode);
8810 else
8811 operands[0] = gen_lowpart (DImode, operands[0]);
8812 operands[1] = gen_lowpart (DImode, operands[1]);
8813 emit_move_insn (operands[0], operands[1]);
8814 return;
8817 /* The only non-offsettable memory we handle is push. */
8818 if (push_operand (operands[0], VOIDmode))
8819 push = 1;
8820 else if (GET_CODE (operands[0]) == MEM
8821 && ! offsettable_memref_p (operands[0]))
8822 abort ();
8824 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8825 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8827 /* When emitting push, take care for source operands on the stack. */
8828 if (push && GET_CODE (operands[1]) == MEM
8829 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8831 if (nparts == 3)
8832 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8833 XEXP (part[1][2], 0));
8834 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8835 XEXP (part[1][1], 0));
8838 /* We need to do copy in the right order in case an address register
8839 of the source overlaps the destination. */
8840 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8842 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8843 collisions++;
8844 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8845 collisions++;
8846 if (nparts == 3
8847 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8848 collisions++;
8850 /* Collision in the middle part can be handled by reordering. */
8851 if (collisions == 1 && nparts == 3
8852 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8854 rtx tmp;
8855 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8856 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8859 /* If there are more collisions, we can't handle it by reordering.
8860 Do an lea to the last part and use only one colliding move. */
8861 else if (collisions > 1)
8863 collisions = 1;
8864 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8865 XEXP (part[1][0], 0)));
8866 part[1][0] = change_address (part[1][0],
8867 TARGET_64BIT ? DImode : SImode,
8868 part[0][nparts - 1]);
8869 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8870 if (nparts == 3)
8871 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8875 if (push)
8877 if (!TARGET_64BIT)
8879 if (nparts == 3)
8881 /* We use only first 12 bytes of TFmode value, but for pushing we
8882 are required to adjust stack as if we were pushing real 16byte
8883 value. */
8884 if (mode == TFmode && !TARGET_64BIT)
8885 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8886 GEN_INT (-4)));
8887 emit_move_insn (part[0][2], part[1][2]);
8890 else
8892 /* In 64bit mode we don't have 32bit push available. In case this is
8893 register, it is OK - we will just use larger counterpart. We also
8894 retype memory - these comes from attempt to avoid REX prefix on
8895 moving of second half of TFmode value. */
8896 if (GET_MODE (part[1][1]) == SImode)
8898 if (GET_CODE (part[1][1]) == MEM)
8899 part[1][1] = adjust_address (part[1][1], DImode, 0);
8900 else if (REG_P (part[1][1]))
8901 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8902 else
8903 abort ();
8904 if (GET_MODE (part[1][0]) == SImode)
8905 part[1][0] = part[1][1];
8908 emit_move_insn (part[0][1], part[1][1]);
8909 emit_move_insn (part[0][0], part[1][0]);
8910 return;
8913 /* Choose correct order to not overwrite the source before it is copied. */
8914 if ((REG_P (part[0][0])
8915 && REG_P (part[1][1])
8916 && (REGNO (part[0][0]) == REGNO (part[1][1])
8917 || (nparts == 3
8918 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8919 || (collisions > 0
8920 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8922 if (nparts == 3)
8924 operands[2] = part[0][2];
8925 operands[3] = part[0][1];
8926 operands[4] = part[0][0];
8927 operands[5] = part[1][2];
8928 operands[6] = part[1][1];
8929 operands[7] = part[1][0];
8931 else
8933 operands[2] = part[0][1];
8934 operands[3] = part[0][0];
8935 operands[5] = part[1][1];
8936 operands[6] = part[1][0];
8939 else
8941 if (nparts == 3)
8943 operands[2] = part[0][0];
8944 operands[3] = part[0][1];
8945 operands[4] = part[0][2];
8946 operands[5] = part[1][0];
8947 operands[6] = part[1][1];
8948 operands[7] = part[1][2];
8950 else
8952 operands[2] = part[0][0];
8953 operands[3] = part[0][1];
8954 operands[5] = part[1][0];
8955 operands[6] = part[1][1];
8958 emit_move_insn (operands[2], operands[5]);
8959 emit_move_insn (operands[3], operands[6]);
8960 if (nparts == 3)
8961 emit_move_insn (operands[4], operands[7]);
8963 return;
8966 void
8967 ix86_split_ashldi (operands, scratch)
8968 rtx *operands, scratch;
8970 rtx low[2], high[2];
8971 int count;
8973 if (GET_CODE (operands[2]) == CONST_INT)
8975 split_di (operands, 2, low, high);
8976 count = INTVAL (operands[2]) & 63;
8978 if (count >= 32)
8980 emit_move_insn (high[0], low[1]);
8981 emit_move_insn (low[0], const0_rtx);
8983 if (count > 32)
8984 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8986 else
8988 if (!rtx_equal_p (operands[0], operands[1]))
8989 emit_move_insn (operands[0], operands[1]);
8990 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8991 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8994 else
8996 if (!rtx_equal_p (operands[0], operands[1]))
8997 emit_move_insn (operands[0], operands[1]);
8999 split_di (operands, 1, low, high);
9001 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9002 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9004 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9006 if (! no_new_pseudos)
9007 scratch = force_reg (SImode, const0_rtx);
9008 else
9009 emit_move_insn (scratch, const0_rtx);
9011 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9012 scratch));
9014 else
9015 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9019 void
9020 ix86_split_ashrdi (operands, scratch)
9021 rtx *operands, scratch;
9023 rtx low[2], high[2];
9024 int count;
9026 if (GET_CODE (operands[2]) == CONST_INT)
9028 split_di (operands, 2, low, high);
9029 count = INTVAL (operands[2]) & 63;
9031 if (count >= 32)
9033 emit_move_insn (low[0], high[1]);
9035 if (! reload_completed)
9036 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9037 else
9039 emit_move_insn (high[0], low[0]);
9040 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9043 if (count > 32)
9044 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9046 else
9048 if (!rtx_equal_p (operands[0], operands[1]))
9049 emit_move_insn (operands[0], operands[1]);
9050 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9051 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9054 else
9056 if (!rtx_equal_p (operands[0], operands[1]))
9057 emit_move_insn (operands[0], operands[1]);
9059 split_di (operands, 1, low, high);
9061 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9062 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9064 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9066 if (! no_new_pseudos)
9067 scratch = gen_reg_rtx (SImode);
9068 emit_move_insn (scratch, high[0]);
9069 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9070 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9071 scratch));
9073 else
9074 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9078 void
9079 ix86_split_lshrdi (operands, scratch)
9080 rtx *operands, scratch;
9082 rtx low[2], high[2];
9083 int count;
9085 if (GET_CODE (operands[2]) == CONST_INT)
9087 split_di (operands, 2, low, high);
9088 count = INTVAL (operands[2]) & 63;
9090 if (count >= 32)
9092 emit_move_insn (low[0], high[1]);
9093 emit_move_insn (high[0], const0_rtx);
9095 if (count > 32)
9096 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9098 else
9100 if (!rtx_equal_p (operands[0], operands[1]))
9101 emit_move_insn (operands[0], operands[1]);
9102 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9103 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9106 else
9108 if (!rtx_equal_p (operands[0], operands[1]))
9109 emit_move_insn (operands[0], operands[1]);
9111 split_di (operands, 1, low, high);
9113 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9114 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9116 /* Heh. By reversing the arguments, we can reuse this pattern. */
9117 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9119 if (! no_new_pseudos)
9120 scratch = force_reg (SImode, const0_rtx);
9121 else
9122 emit_move_insn (scratch, const0_rtx);
9124 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9125 scratch));
9127 else
9128 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9132 /* Helper function for the string operations below. Dest VARIABLE whether
9133 it is aligned to VALUE bytes. If true, jump to the label. */
9134 static rtx
9135 ix86_expand_aligntest (variable, value)
9136 rtx variable;
9137 int value;
9139 rtx label = gen_label_rtx ();
9140 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9141 if (GET_MODE (variable) == DImode)
9142 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9143 else
9144 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9145 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9146 1, label);
9147 return label;
9150 /* Adjust COUNTER by the VALUE. */
9151 static void
9152 ix86_adjust_counter (countreg, value)
9153 rtx countreg;
9154 HOST_WIDE_INT value;
9156 if (GET_MODE (countreg) == DImode)
9157 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9158 else
9159 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9162 /* Zero extend possibly SImode EXP to Pmode register. */
9164 ix86_zero_extend_to_Pmode (exp)
9165 rtx exp;
9167 rtx r;
9168 if (GET_MODE (exp) == VOIDmode)
9169 return force_reg (Pmode, exp);
9170 if (GET_MODE (exp) == Pmode)
9171 return copy_to_mode_reg (Pmode, exp);
9172 r = gen_reg_rtx (Pmode);
9173 emit_insn (gen_zero_extendsidi2 (r, exp));
9174 return r;
9177 /* Expand string move (memcpy) operation. Use i386 string operations when
9178 profitable. expand_clrstr contains similar code. */
9180 ix86_expand_movstr (dst, src, count_exp, align_exp)
9181 rtx dst, src, count_exp, align_exp;
9183 rtx srcreg, destreg, countreg;
9184 enum machine_mode counter_mode;
9185 HOST_WIDE_INT align = 0;
9186 unsigned HOST_WIDE_INT count = 0;
9187 rtx insns;
9189 start_sequence ();
9191 if (GET_CODE (align_exp) == CONST_INT)
9192 align = INTVAL (align_exp);
9194 /* This simple hack avoids all inlining code and simplifies code below. */
9195 if (!TARGET_ALIGN_STRINGOPS)
9196 align = 64;
9198 if (GET_CODE (count_exp) == CONST_INT)
9199 count = INTVAL (count_exp);
9201 /* Figure out proper mode for counter. For 32bits it is always SImode,
9202 for 64bits use SImode when possible, otherwise DImode.
9203 Set count to number of bytes copied when known at compile time. */
9204 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9205 || x86_64_zero_extended_value (count_exp))
9206 counter_mode = SImode;
9207 else
9208 counter_mode = DImode;
9210 if (counter_mode != SImode && counter_mode != DImode)
9211 abort ();
9213 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9214 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9216 emit_insn (gen_cld ());
9218 /* When optimizing for size emit simple rep ; movsb instruction for
9219 counts not divisible by 4. */
9221 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9223 countreg = ix86_zero_extend_to_Pmode (count_exp);
9224 if (TARGET_64BIT)
9225 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9226 destreg, srcreg, countreg));
9227 else
9228 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9229 destreg, srcreg, countreg));
9232 /* For constant aligned (or small unaligned) copies use rep movsl
9233 followed by code copying the rest. For PentiumPro ensure 8 byte
9234 alignment to allow rep movsl acceleration. */
9236 else if (count != 0
9237 && (align >= 8
9238 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9239 || optimize_size || count < (unsigned int) 64))
9241 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9242 if (count & ~(size - 1))
9244 countreg = copy_to_mode_reg (counter_mode,
9245 GEN_INT ((count >> (size == 4 ? 2 : 3))
9246 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9247 countreg = ix86_zero_extend_to_Pmode (countreg);
9248 if (size == 4)
9250 if (TARGET_64BIT)
9251 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9252 destreg, srcreg, countreg));
9253 else
9254 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9255 destreg, srcreg, countreg));
9257 else
9258 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9259 destreg, srcreg, countreg));
9261 if (size == 8 && (count & 0x04))
9262 emit_insn (gen_strmovsi (destreg, srcreg));
9263 if (count & 0x02)
9264 emit_insn (gen_strmovhi (destreg, srcreg));
9265 if (count & 0x01)
9266 emit_insn (gen_strmovqi (destreg, srcreg));
9268 /* The generic code based on the glibc implementation:
9269 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9270 allowing accelerated copying there)
9271 - copy the data using rep movsl
9272 - copy the rest. */
9273 else
9275 rtx countreg2;
9276 rtx label = NULL;
9277 int desired_alignment = (TARGET_PENTIUMPRO
9278 && (count == 0 || count >= (unsigned int) 260)
9279 ? 8 : UNITS_PER_WORD);
9281 /* In case we don't know anything about the alignment, default to
9282 library version, since it is usually equally fast and result in
9283 shorter code. */
9284 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9286 end_sequence ();
9287 return 0;
9290 if (TARGET_SINGLE_STRINGOP)
9291 emit_insn (gen_cld ());
9293 countreg2 = gen_reg_rtx (Pmode);
9294 countreg = copy_to_mode_reg (counter_mode, count_exp);
9296 /* We don't use loops to align destination and to copy parts smaller
9297 than 4 bytes, because gcc is able to optimize such code better (in
9298 the case the destination or the count really is aligned, gcc is often
9299 able to predict the branches) and also it is friendlier to the
9300 hardware branch prediction.
9302 Using loops is benefical for generic case, because we can
9303 handle small counts using the loops. Many CPUs (such as Athlon)
9304 have large REP prefix setup costs.
9306 This is quite costy. Maybe we can revisit this decision later or
9307 add some customizability to this code. */
9309 if (count == 0 && align < desired_alignment)
9311 label = gen_label_rtx ();
9312 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9313 LEU, 0, counter_mode, 1, label);
9315 if (align <= 1)
9317 rtx label = ix86_expand_aligntest (destreg, 1);
9318 emit_insn (gen_strmovqi (destreg, srcreg));
9319 ix86_adjust_counter (countreg, 1);
9320 emit_label (label);
9321 LABEL_NUSES (label) = 1;
9323 if (align <= 2)
9325 rtx label = ix86_expand_aligntest (destreg, 2);
9326 emit_insn (gen_strmovhi (destreg, srcreg));
9327 ix86_adjust_counter (countreg, 2);
9328 emit_label (label);
9329 LABEL_NUSES (label) = 1;
9331 if (align <= 4 && desired_alignment > 4)
9333 rtx label = ix86_expand_aligntest (destreg, 4);
9334 emit_insn (gen_strmovsi (destreg, srcreg));
9335 ix86_adjust_counter (countreg, 4);
9336 emit_label (label);
9337 LABEL_NUSES (label) = 1;
9340 if (label && desired_alignment > 4 && !TARGET_64BIT)
9342 emit_label (label);
9343 LABEL_NUSES (label) = 1;
9344 label = NULL_RTX;
9346 if (!TARGET_SINGLE_STRINGOP)
9347 emit_insn (gen_cld ());
9348 if (TARGET_64BIT)
9350 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9351 GEN_INT (3)));
9352 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9353 destreg, srcreg, countreg2));
9355 else
9357 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9358 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9359 destreg, srcreg, countreg2));
9362 if (label)
9364 emit_label (label);
9365 LABEL_NUSES (label) = 1;
9367 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9368 emit_insn (gen_strmovsi (destreg, srcreg));
9369 if ((align <= 4 || count == 0) && TARGET_64BIT)
9371 rtx label = ix86_expand_aligntest (countreg, 4);
9372 emit_insn (gen_strmovsi (destreg, srcreg));
9373 emit_label (label);
9374 LABEL_NUSES (label) = 1;
9376 if (align > 2 && count != 0 && (count & 2))
9377 emit_insn (gen_strmovhi (destreg, srcreg));
9378 if (align <= 2 || count == 0)
9380 rtx label = ix86_expand_aligntest (countreg, 2);
9381 emit_insn (gen_strmovhi (destreg, srcreg));
9382 emit_label (label);
9383 LABEL_NUSES (label) = 1;
9385 if (align > 1 && count != 0 && (count & 1))
9386 emit_insn (gen_strmovqi (destreg, srcreg));
9387 if (align <= 1 || count == 0)
9389 rtx label = ix86_expand_aligntest (countreg, 1);
9390 emit_insn (gen_strmovqi (destreg, srcreg));
9391 emit_label (label);
9392 LABEL_NUSES (label) = 1;
9396 insns = get_insns ();
9397 end_sequence ();
9399 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9400 emit_insns (insns);
9401 return 1;
9404 /* Expand string clear operation (bzero). Use i386 string operations when
9405 profitable. expand_movstr contains similar code. */
9407 ix86_expand_clrstr (src, count_exp, align_exp)
9408 rtx src, count_exp, align_exp;
9410 rtx destreg, zeroreg, countreg;
9411 enum machine_mode counter_mode;
9412 HOST_WIDE_INT align = 0;
9413 unsigned HOST_WIDE_INT count = 0;
9415 if (GET_CODE (align_exp) == CONST_INT)
9416 align = INTVAL (align_exp);
9418 /* This simple hack avoids all inlining code and simplifies code below. */
9419 if (!TARGET_ALIGN_STRINGOPS)
9420 align = 32;
9422 if (GET_CODE (count_exp) == CONST_INT)
9423 count = INTVAL (count_exp);
9424 /* Figure out proper mode for counter. For 32bits it is always SImode,
9425 for 64bits use SImode when possible, otherwise DImode.
9426 Set count to number of bytes copied when known at compile time. */
9427 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9428 || x86_64_zero_extended_value (count_exp))
9429 counter_mode = SImode;
9430 else
9431 counter_mode = DImode;
9433 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9435 emit_insn (gen_cld ());
9437 /* When optimizing for size emit simple rep ; movsb instruction for
9438 counts not divisible by 4. */
9440 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9442 countreg = ix86_zero_extend_to_Pmode (count_exp);
9443 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9444 if (TARGET_64BIT)
9445 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9446 destreg, countreg));
9447 else
9448 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9449 destreg, countreg));
9451 else if (count != 0
9452 && (align >= 8
9453 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9454 || optimize_size || count < (unsigned int) 64))
9456 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9457 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9458 if (count & ~(size - 1))
9460 countreg = copy_to_mode_reg (counter_mode,
9461 GEN_INT ((count >> (size == 4 ? 2 : 3))
9462 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9463 countreg = ix86_zero_extend_to_Pmode (countreg);
9464 if (size == 4)
9466 if (TARGET_64BIT)
9467 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9468 destreg, countreg));
9469 else
9470 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9471 destreg, countreg));
9473 else
9474 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9475 destreg, countreg));
9477 if (size == 8 && (count & 0x04))
9478 emit_insn (gen_strsetsi (destreg,
9479 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9480 if (count & 0x02)
9481 emit_insn (gen_strsethi (destreg,
9482 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9483 if (count & 0x01)
9484 emit_insn (gen_strsetqi (destreg,
9485 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9487 else
9489 rtx countreg2;
9490 rtx label = NULL;
9491 /* Compute desired alignment of the string operation. */
9492 int desired_alignment = (TARGET_PENTIUMPRO
9493 && (count == 0 || count >= (unsigned int) 260)
9494 ? 8 : UNITS_PER_WORD);
9496 /* In case we don't know anything about the alignment, default to
9497 library version, since it is usually equally fast and result in
9498 shorter code. */
9499 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9500 return 0;
9502 if (TARGET_SINGLE_STRINGOP)
9503 emit_insn (gen_cld ());
9505 countreg2 = gen_reg_rtx (Pmode);
9506 countreg = copy_to_mode_reg (counter_mode, count_exp);
9507 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9509 if (count == 0 && align < desired_alignment)
9511 label = gen_label_rtx ();
9512 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
9513 LEU, 0, counter_mode, 1, label);
9515 if (align <= 1)
9517 rtx label = ix86_expand_aligntest (destreg, 1);
9518 emit_insn (gen_strsetqi (destreg,
9519 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9520 ix86_adjust_counter (countreg, 1);
9521 emit_label (label);
9522 LABEL_NUSES (label) = 1;
9524 if (align <= 2)
9526 rtx label = ix86_expand_aligntest (destreg, 2);
9527 emit_insn (gen_strsethi (destreg,
9528 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9529 ix86_adjust_counter (countreg, 2);
9530 emit_label (label);
9531 LABEL_NUSES (label) = 1;
9533 if (align <= 4 && desired_alignment > 4)
9535 rtx label = ix86_expand_aligntest (destreg, 4);
9536 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9537 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9538 : zeroreg)));
9539 ix86_adjust_counter (countreg, 4);
9540 emit_label (label);
9541 LABEL_NUSES (label) = 1;
9544 if (label && desired_alignment > 4 && !TARGET_64BIT)
9546 emit_label (label);
9547 LABEL_NUSES (label) = 1;
9548 label = NULL_RTX;
9551 if (!TARGET_SINGLE_STRINGOP)
9552 emit_insn (gen_cld ());
9553 if (TARGET_64BIT)
9555 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9556 GEN_INT (3)));
9557 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9558 destreg, countreg2));
9560 else
9562 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9563 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9564 destreg, countreg2));
9566 if (label)
9568 emit_label (label);
9569 LABEL_NUSES (label) = 1;
9572 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9573 emit_insn (gen_strsetsi (destreg,
9574 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9575 if (TARGET_64BIT && (align <= 4 || count == 0))
9577 rtx label = ix86_expand_aligntest (countreg, 2);
9578 emit_insn (gen_strsetsi (destreg,
9579 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9580 emit_label (label);
9581 LABEL_NUSES (label) = 1;
9583 if (align > 2 && count != 0 && (count & 2))
9584 emit_insn (gen_strsethi (destreg,
9585 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9586 if (align <= 2 || count == 0)
9588 rtx label = ix86_expand_aligntest (countreg, 2);
9589 emit_insn (gen_strsethi (destreg,
9590 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9591 emit_label (label);
9592 LABEL_NUSES (label) = 1;
9594 if (align > 1 && count != 0 && (count & 1))
9595 emit_insn (gen_strsetqi (destreg,
9596 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9597 if (align <= 1 || count == 0)
9599 rtx label = ix86_expand_aligntest (countreg, 1);
9600 emit_insn (gen_strsetqi (destreg,
9601 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9602 emit_label (label);
9603 LABEL_NUSES (label) = 1;
9606 return 1;
9608 /* Expand strlen. */
9610 ix86_expand_strlen (out, src, eoschar, align)
9611 rtx out, src, eoschar, align;
9613 rtx addr, scratch1, scratch2, scratch3, scratch4;
9615 /* The generic case of strlen expander is long. Avoid it's
9616 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9618 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9619 && !TARGET_INLINE_ALL_STRINGOPS
9620 && !optimize_size
9621 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9622 return 0;
9624 addr = force_reg (Pmode, XEXP (src, 0));
9625 scratch1 = gen_reg_rtx (Pmode);
9627 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9628 && !optimize_size)
9630 /* Well it seems that some optimizer does not combine a call like
9631 foo(strlen(bar), strlen(bar));
9632 when the move and the subtraction is done here. It does calculate
9633 the length just once when these instructions are done inside of
9634 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9635 often used and I use one fewer register for the lifetime of
9636 output_strlen_unroll() this is better. */
9638 emit_move_insn (out, addr);
9640 ix86_expand_strlensi_unroll_1 (out, align);
9642 /* strlensi_unroll_1 returns the address of the zero at the end of
9643 the string, like memchr(), so compute the length by subtracting
9644 the start address. */
9645 if (TARGET_64BIT)
9646 emit_insn (gen_subdi3 (out, out, addr));
9647 else
9648 emit_insn (gen_subsi3 (out, out, addr));
9650 else
9652 scratch2 = gen_reg_rtx (Pmode);
9653 scratch3 = gen_reg_rtx (Pmode);
9654 scratch4 = force_reg (Pmode, constm1_rtx);
9656 emit_move_insn (scratch3, addr);
9657 eoschar = force_reg (QImode, eoschar);
9659 emit_insn (gen_cld ());
9660 if (TARGET_64BIT)
9662 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9663 align, scratch4, scratch3));
9664 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9665 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9667 else
9669 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9670 align, scratch4, scratch3));
9671 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9672 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9675 return 1;
9678 /* Expand the appropriate insns for doing strlen if not just doing
9679 repnz; scasb
9681 out = result, initialized with the start address
9682 align_rtx = alignment of the address.
9683 scratch = scratch register, initialized with the startaddress when
9684 not aligned, otherwise undefined
9686 This is just the body. It needs the initialisations mentioned above and
9687 some address computing at the end. These things are done in i386.md. */
9689 static void
9690 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9691 rtx out, align_rtx;
9693 int align;
9694 rtx tmp;
9695 rtx align_2_label = NULL_RTX;
9696 rtx align_3_label = NULL_RTX;
9697 rtx align_4_label = gen_label_rtx ();
9698 rtx end_0_label = gen_label_rtx ();
9699 rtx mem;
9700 rtx tmpreg = gen_reg_rtx (SImode);
9701 rtx scratch = gen_reg_rtx (SImode);
9703 align = 0;
9704 if (GET_CODE (align_rtx) == CONST_INT)
9705 align = INTVAL (align_rtx);
9707 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9709 /* Is there a known alignment and is it less than 4? */
9710 if (align < 4)
9712 rtx scratch1 = gen_reg_rtx (Pmode);
9713 emit_move_insn (scratch1, out);
9714 /* Is there a known alignment and is it not 2? */
9715 if (align != 2)
9717 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9718 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9720 /* Leave just the 3 lower bits. */
9721 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9722 NULL_RTX, 0, OPTAB_WIDEN);
9724 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9725 Pmode, 1, align_4_label);
9726 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9727 Pmode, 1, align_2_label);
9728 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9729 Pmode, 1, align_3_label);
9731 else
9733 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9734 check if is aligned to 4 - byte. */
9736 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9737 NULL_RTX, 0, OPTAB_WIDEN);
9739 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9740 Pmode, 1, align_4_label);
9743 mem = gen_rtx_MEM (QImode, out);
9745 /* Now compare the bytes. */
9747 /* Compare the first n unaligned byte on a byte per byte basis. */
9748 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9749 QImode, 1, end_0_label);
9751 /* Increment the address. */
9752 if (TARGET_64BIT)
9753 emit_insn (gen_adddi3 (out, out, const1_rtx));
9754 else
9755 emit_insn (gen_addsi3 (out, out, const1_rtx));
9757 /* Not needed with an alignment of 2 */
9758 if (align != 2)
9760 emit_label (align_2_label);
9762 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9763 end_0_label);
9765 if (TARGET_64BIT)
9766 emit_insn (gen_adddi3 (out, out, const1_rtx));
9767 else
9768 emit_insn (gen_addsi3 (out, out, const1_rtx));
9770 emit_label (align_3_label);
9773 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9774 end_0_label);
9776 if (TARGET_64BIT)
9777 emit_insn (gen_adddi3 (out, out, const1_rtx));
9778 else
9779 emit_insn (gen_addsi3 (out, out, const1_rtx));
9782 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9783 align this loop. It gives only huge programs, but does not help to
9784 speed up. */
9785 emit_label (align_4_label);
9787 mem = gen_rtx_MEM (SImode, out);
9788 emit_move_insn (scratch, mem);
9789 if (TARGET_64BIT)
9790 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9791 else
9792 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9794 /* This formula yields a nonzero result iff one of the bytes is zero.
9795 This saves three branches inside loop and many cycles. */
9797 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9798 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9799 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9800 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9801 gen_int_mode (0x80808080, SImode)));
9802 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9803 align_4_label);
9805 if (TARGET_CMOVE)
9807 rtx reg = gen_reg_rtx (SImode);
9808 rtx reg2 = gen_reg_rtx (Pmode);
9809 emit_move_insn (reg, tmpreg);
9810 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9812 /* If zero is not in the first two bytes, move two bytes forward. */
9813 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9814 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9815 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9816 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9817 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9818 reg,
9819 tmpreg)));
9820 /* Emit lea manually to avoid clobbering of flags. */
9821 emit_insn (gen_rtx_SET (SImode, reg2,
9822 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9824 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9825 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9826 emit_insn (gen_rtx_SET (VOIDmode, out,
9827 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9828 reg2,
9829 out)));
9832 else
9834 rtx end_2_label = gen_label_rtx ();
9835 /* Is zero in the first two bytes? */
9837 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9838 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9839 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9840 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9841 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9842 pc_rtx);
9843 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9844 JUMP_LABEL (tmp) = end_2_label;
9846 /* Not in the first two. Move two bytes forward. */
9847 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9848 if (TARGET_64BIT)
9849 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9850 else
9851 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9853 emit_label (end_2_label);
9857 /* Avoid branch in fixing the byte. */
9858 tmpreg = gen_lowpart (QImode, tmpreg);
9859 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9860 if (TARGET_64BIT)
9861 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9862 else
9863 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9865 emit_label (end_0_label);
9868 /* Clear stack slot assignments remembered from previous functions.
9869 This is called from INIT_EXPANDERS once before RTL is emitted for each
9870 function. */
9872 static void
9873 ix86_init_machine_status (p)
9874 struct function *p;
9876 p->machine = (struct machine_function *)
9877 xcalloc (1, sizeof (struct machine_function));
9880 /* Mark machine specific bits of P for GC. */
9881 static void
9882 ix86_mark_machine_status (p)
9883 struct function *p;
9885 struct machine_function *machine = p->machine;
9886 enum machine_mode mode;
9887 int n;
9889 if (! machine)
9890 return;
9892 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9893 mode = (enum machine_mode) ((int) mode + 1))
9894 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9895 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9898 static void
9899 ix86_free_machine_status (p)
9900 struct function *p;
9902 free (p->machine);
9903 p->machine = NULL;
9906 /* Return a MEM corresponding to a stack slot with mode MODE.
9907 Allocate a new slot if necessary.
9909 The RTL for a function can have several slots available: N is
9910 which slot to use. */
9913 assign_386_stack_local (mode, n)
9914 enum machine_mode mode;
9915 int n;
9917 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9918 abort ();
9920 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9921 ix86_stack_locals[(int) mode][n]
9922 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9924 return ix86_stack_locals[(int) mode][n];
9927 /* Calculate the length of the memory address in the instruction
9928 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9930 static int
9931 memory_address_length (addr)
9932 rtx addr;
9934 struct ix86_address parts;
9935 rtx base, index, disp;
9936 int len;
9938 if (GET_CODE (addr) == PRE_DEC
9939 || GET_CODE (addr) == POST_INC
9940 || GET_CODE (addr) == PRE_MODIFY
9941 || GET_CODE (addr) == POST_MODIFY)
9942 return 0;
9944 if (! ix86_decompose_address (addr, &parts))
9945 abort ();
9947 base = parts.base;
9948 index = parts.index;
9949 disp = parts.disp;
9950 len = 0;
9952 /* Register Indirect. */
9953 if (base && !index && !disp)
9955 /* Special cases: ebp and esp need the two-byte modrm form. */
9956 if (addr == stack_pointer_rtx
9957 || addr == arg_pointer_rtx
9958 || addr == frame_pointer_rtx
9959 || addr == hard_frame_pointer_rtx)
9960 len = 1;
9963 /* Direct Addressing. */
9964 else if (disp && !base && !index)
9965 len = 4;
9967 else
9969 /* Find the length of the displacement constant. */
9970 if (disp)
9972 if (GET_CODE (disp) == CONST_INT
9973 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9974 len = 1;
9975 else
9976 len = 4;
9979 /* An index requires the two-byte modrm form. */
9980 if (index)
9981 len += 1;
9984 return len;
9987 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9988 expect that insn have 8bit immediate alternative. */
9990 ix86_attr_length_immediate_default (insn, shortform)
9991 rtx insn;
9992 int shortform;
9994 int len = 0;
9995 int i;
9996 extract_insn_cached (insn);
9997 for (i = recog_data.n_operands - 1; i >= 0; --i)
9998 if (CONSTANT_P (recog_data.operand[i]))
10000 if (len)
10001 abort ();
10002 if (shortform
10003 && GET_CODE (recog_data.operand[i]) == CONST_INT
10004 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10005 len = 1;
10006 else
10008 switch (get_attr_mode (insn))
10010 case MODE_QI:
10011 len+=1;
10012 break;
10013 case MODE_HI:
10014 len+=2;
10015 break;
10016 case MODE_SI:
10017 len+=4;
10018 break;
10019 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10020 case MODE_DI:
10021 len+=4;
10022 break;
10023 default:
10024 fatal_insn ("unknown insn mode", insn);
10028 return len;
10030 /* Compute default value for "length_address" attribute. */
10032 ix86_attr_length_address_default (insn)
10033 rtx insn;
10035 int i;
10036 extract_insn_cached (insn);
10037 for (i = recog_data.n_operands - 1; i >= 0; --i)
10038 if (GET_CODE (recog_data.operand[i]) == MEM)
10040 return memory_address_length (XEXP (recog_data.operand[i], 0));
10041 break;
10043 return 0;
10046 /* Return the maximum number of instructions a cpu can issue. */
10048 static int
10049 ix86_issue_rate ()
10051 switch (ix86_cpu)
10053 case PROCESSOR_PENTIUM:
10054 case PROCESSOR_K6:
10055 return 2;
10057 case PROCESSOR_PENTIUMPRO:
10058 case PROCESSOR_PENTIUM4:
10059 case PROCESSOR_ATHLON:
10060 return 3;
10062 default:
10063 return 1;
10067 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10068 by DEP_INSN and nothing set by DEP_INSN. */
10070 static int
10071 ix86_flags_dependant (insn, dep_insn, insn_type)
10072 rtx insn, dep_insn;
10073 enum attr_type insn_type;
10075 rtx set, set2;
10077 /* Simplify the test for uninteresting insns. */
10078 if (insn_type != TYPE_SETCC
10079 && insn_type != TYPE_ICMOV
10080 && insn_type != TYPE_FCMOV
10081 && insn_type != TYPE_IBR)
10082 return 0;
10084 if ((set = single_set (dep_insn)) != 0)
10086 set = SET_DEST (set);
10087 set2 = NULL_RTX;
10089 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10090 && XVECLEN (PATTERN (dep_insn), 0) == 2
10091 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10092 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10094 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10095 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10097 else
10098 return 0;
10100 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10101 return 0;
10103 /* This test is true if the dependent insn reads the flags but
10104 not any other potentially set register. */
10105 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10106 return 0;
10108 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10109 return 0;
10111 return 1;
10114 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10115 address with operands set by DEP_INSN. */
10117 static int
10118 ix86_agi_dependant (insn, dep_insn, insn_type)
10119 rtx insn, dep_insn;
10120 enum attr_type insn_type;
10122 rtx addr;
10124 if (insn_type == TYPE_LEA
10125 && TARGET_PENTIUM)
10127 addr = PATTERN (insn);
10128 if (GET_CODE (addr) == SET)
10130 else if (GET_CODE (addr) == PARALLEL
10131 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10132 addr = XVECEXP (addr, 0, 0);
10133 else
10134 abort ();
10135 addr = SET_SRC (addr);
10137 else
10139 int i;
10140 extract_insn_cached (insn);
10141 for (i = recog_data.n_operands - 1; i >= 0; --i)
10142 if (GET_CODE (recog_data.operand[i]) == MEM)
10144 addr = XEXP (recog_data.operand[i], 0);
10145 goto found;
10147 return 0;
10148 found:;
10151 return modified_in_p (addr, dep_insn);
10154 static int
10155 ix86_adjust_cost (insn, link, dep_insn, cost)
10156 rtx insn, link, dep_insn;
10157 int cost;
10159 enum attr_type insn_type, dep_insn_type;
10160 enum attr_memory memory, dep_memory;
10161 rtx set, set2;
10162 int dep_insn_code_number;
10164 /* Anti and output depenancies have zero cost on all CPUs. */
10165 if (REG_NOTE_KIND (link) != 0)
10166 return 0;
10168 dep_insn_code_number = recog_memoized (dep_insn);
10170 /* If we can't recognize the insns, we can't really do anything. */
10171 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10172 return cost;
10174 insn_type = get_attr_type (insn);
10175 dep_insn_type = get_attr_type (dep_insn);
10177 switch (ix86_cpu)
10179 case PROCESSOR_PENTIUM:
10180 /* Address Generation Interlock adds a cycle of latency. */
10181 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10182 cost += 1;
10184 /* ??? Compares pair with jump/setcc. */
10185 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10186 cost = 0;
10188 /* Floating point stores require value to be ready one cycle ealier. */
10189 if (insn_type == TYPE_FMOV
10190 && get_attr_memory (insn) == MEMORY_STORE
10191 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10192 cost += 1;
10193 break;
10195 case PROCESSOR_PENTIUMPRO:
10196 memory = get_attr_memory (insn);
10197 dep_memory = get_attr_memory (dep_insn);
10199 /* Since we can't represent delayed latencies of load+operation,
10200 increase the cost here for non-imov insns. */
10201 if (dep_insn_type != TYPE_IMOV
10202 && dep_insn_type != TYPE_FMOV
10203 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10204 cost += 1;
10206 /* INT->FP conversion is expensive. */
10207 if (get_attr_fp_int_src (dep_insn))
10208 cost += 5;
10210 /* There is one cycle extra latency between an FP op and a store. */
10211 if (insn_type == TYPE_FMOV
10212 && (set = single_set (dep_insn)) != NULL_RTX
10213 && (set2 = single_set (insn)) != NULL_RTX
10214 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10215 && GET_CODE (SET_DEST (set2)) == MEM)
10216 cost += 1;
10218 /* Show ability of reorder buffer to hide latency of load by executing
10219 in parallel with previous instruction in case
10220 previous instruction is not needed to compute the address. */
10221 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10222 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10224 /* Claim moves to take one cycle, as core can issue one load
10225 at time and the next load can start cycle later. */
10226 if (dep_insn_type == TYPE_IMOV
10227 || dep_insn_type == TYPE_FMOV)
10228 cost = 1;
10229 else if (cost > 1)
10230 cost--;
10232 break;
10234 case PROCESSOR_K6:
10235 memory = get_attr_memory (insn);
10236 dep_memory = get_attr_memory (dep_insn);
10237 /* The esp dependency is resolved before the instruction is really
10238 finished. */
10239 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10240 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10241 return 1;
10243 /* Since we can't represent delayed latencies of load+operation,
10244 increase the cost here for non-imov insns. */
10245 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10246 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10248 /* INT->FP conversion is expensive. */
10249 if (get_attr_fp_int_src (dep_insn))
10250 cost += 5;
10252 /* Show ability of reorder buffer to hide latency of load by executing
10253 in parallel with previous instruction in case
10254 previous instruction is not needed to compute the address. */
10255 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10256 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10258 /* Claim moves to take one cycle, as core can issue one load
10259 at time and the next load can start cycle later. */
10260 if (dep_insn_type == TYPE_IMOV
10261 || dep_insn_type == TYPE_FMOV)
10262 cost = 1;
10263 else if (cost > 2)
10264 cost -= 2;
10265 else
10266 cost = 1;
10268 break;
10270 case PROCESSOR_ATHLON:
10271 memory = get_attr_memory (insn);
10272 dep_memory = get_attr_memory (dep_insn);
10274 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10276 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10277 cost += 2;
10278 else
10279 cost += 3;
10281 /* Show ability of reorder buffer to hide latency of load by executing
10282 in parallel with previous instruction in case
10283 previous instruction is not needed to compute the address. */
10284 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10285 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10287 /* Claim moves to take one cycle, as core can issue one load
10288 at time and the next load can start cycle later. */
10289 if (dep_insn_type == TYPE_IMOV
10290 || dep_insn_type == TYPE_FMOV)
10291 cost = 0;
10292 else if (cost >= 3)
10293 cost -= 3;
10294 else
10295 cost = 0;
10298 default:
10299 break;
10302 return cost;
10305 static union
10307 struct ppro_sched_data
10309 rtx decode[3];
10310 int issued_this_cycle;
10311 } ppro;
10312 } ix86_sched_data;
10314 static enum attr_ppro_uops
10315 ix86_safe_ppro_uops (insn)
10316 rtx insn;
10318 if (recog_memoized (insn) >= 0)
10319 return get_attr_ppro_uops (insn);
10320 else
10321 return PPRO_UOPS_MANY;
10324 static void
10325 ix86_dump_ppro_packet (dump)
10326 FILE *dump;
10328 if (ix86_sched_data.ppro.decode[0])
10330 fprintf (dump, "PPRO packet: %d",
10331 INSN_UID (ix86_sched_data.ppro.decode[0]));
10332 if (ix86_sched_data.ppro.decode[1])
10333 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10334 if (ix86_sched_data.ppro.decode[2])
10335 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10336 fputc ('\n', dump);
10340 /* We're beginning a new block. Initialize data structures as necessary. */
10342 static void
10343 ix86_sched_init (dump, sched_verbose, veclen)
10344 FILE *dump ATTRIBUTE_UNUSED;
10345 int sched_verbose ATTRIBUTE_UNUSED;
10346 int veclen ATTRIBUTE_UNUSED;
10348 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10351 /* Shift INSN to SLOT, and shift everything else down. */
10353 static void
10354 ix86_reorder_insn (insnp, slot)
10355 rtx *insnp, *slot;
10357 if (insnp != slot)
10359 rtx insn = *insnp;
10361 insnp[0] = insnp[1];
10362 while (++insnp != slot);
10363 *insnp = insn;
10367 static void
10368 ix86_sched_reorder_ppro (ready, e_ready)
10369 rtx *ready;
10370 rtx *e_ready;
10372 rtx decode[3];
10373 enum attr_ppro_uops cur_uops;
10374 int issued_this_cycle;
10375 rtx *insnp;
10376 int i;
10378 /* At this point .ppro.decode contains the state of the three
10379 decoders from last "cycle". That is, those insns that were
10380 actually independent. But here we're scheduling for the
10381 decoder, and we may find things that are decodable in the
10382 same cycle. */
10384 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10385 issued_this_cycle = 0;
10387 insnp = e_ready;
10388 cur_uops = ix86_safe_ppro_uops (*insnp);
10390 /* If the decoders are empty, and we've a complex insn at the
10391 head of the priority queue, let it issue without complaint. */
10392 if (decode[0] == NULL)
10394 if (cur_uops == PPRO_UOPS_MANY)
10396 decode[0] = *insnp;
10397 goto ppro_done;
10400 /* Otherwise, search for a 2-4 uop unsn to issue. */
10401 while (cur_uops != PPRO_UOPS_FEW)
10403 if (insnp == ready)
10404 break;
10405 cur_uops = ix86_safe_ppro_uops (*--insnp);
10408 /* If so, move it to the head of the line. */
10409 if (cur_uops == PPRO_UOPS_FEW)
10410 ix86_reorder_insn (insnp, e_ready);
10412 /* Issue the head of the queue. */
10413 issued_this_cycle = 1;
10414 decode[0] = *e_ready--;
10417 /* Look for simple insns to fill in the other two slots. */
10418 for (i = 1; i < 3; ++i)
10419 if (decode[i] == NULL)
10421 if (ready >= e_ready)
10422 goto ppro_done;
10424 insnp = e_ready;
10425 cur_uops = ix86_safe_ppro_uops (*insnp);
10426 while (cur_uops != PPRO_UOPS_ONE)
10428 if (insnp == ready)
10429 break;
10430 cur_uops = ix86_safe_ppro_uops (*--insnp);
10433 /* Found one. Move it to the head of the queue and issue it. */
10434 if (cur_uops == PPRO_UOPS_ONE)
10436 ix86_reorder_insn (insnp, e_ready);
10437 decode[i] = *e_ready--;
10438 issued_this_cycle++;
10439 continue;
10442 /* ??? Didn't find one. Ideally, here we would do a lazy split
10443 of 2-uop insns, issue one and queue the other. */
10446 ppro_done:
10447 if (issued_this_cycle == 0)
10448 issued_this_cycle = 1;
10449 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10452 /* We are about to being issuing insns for this clock cycle.
10453 Override the default sort algorithm to better slot instructions. */
10454 static int
10455 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10456 FILE *dump ATTRIBUTE_UNUSED;
10457 int sched_verbose ATTRIBUTE_UNUSED;
10458 rtx *ready;
10459 int *n_readyp;
10460 int clock_var ATTRIBUTE_UNUSED;
10462 int n_ready = *n_readyp;
10463 rtx *e_ready = ready + n_ready - 1;
10465 if (n_ready < 2)
10466 goto out;
10468 switch (ix86_cpu)
10470 default:
10471 break;
10473 case PROCESSOR_PENTIUMPRO:
10474 ix86_sched_reorder_ppro (ready, e_ready);
10475 break;
10478 out:
10479 return ix86_issue_rate ();
10482 /* We are about to issue INSN. Return the number of insns left on the
10483 ready queue that can be issued this cycle. */
10485 static int
10486 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10487 FILE *dump;
10488 int sched_verbose;
10489 rtx insn;
10490 int can_issue_more;
10492 int i;
10493 switch (ix86_cpu)
10495 default:
10496 return can_issue_more - 1;
10498 case PROCESSOR_PENTIUMPRO:
10500 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10502 if (uops == PPRO_UOPS_MANY)
10504 if (sched_verbose)
10505 ix86_dump_ppro_packet (dump);
10506 ix86_sched_data.ppro.decode[0] = insn;
10507 ix86_sched_data.ppro.decode[1] = NULL;
10508 ix86_sched_data.ppro.decode[2] = NULL;
10509 if (sched_verbose)
10510 ix86_dump_ppro_packet (dump);
10511 ix86_sched_data.ppro.decode[0] = NULL;
10513 else if (uops == PPRO_UOPS_FEW)
10515 if (sched_verbose)
10516 ix86_dump_ppro_packet (dump);
10517 ix86_sched_data.ppro.decode[0] = insn;
10518 ix86_sched_data.ppro.decode[1] = NULL;
10519 ix86_sched_data.ppro.decode[2] = NULL;
10521 else
10523 for (i = 0; i < 3; ++i)
10524 if (ix86_sched_data.ppro.decode[i] == NULL)
10526 ix86_sched_data.ppro.decode[i] = insn;
10527 break;
10529 if (i == 3)
10530 abort ();
10531 if (i == 2)
10533 if (sched_verbose)
10534 ix86_dump_ppro_packet (dump);
10535 ix86_sched_data.ppro.decode[0] = NULL;
10536 ix86_sched_data.ppro.decode[1] = NULL;
10537 ix86_sched_data.ppro.decode[2] = NULL;
10541 return --ix86_sched_data.ppro.issued_this_cycle;
10545 static int
10546 ia32_use_dfa_pipeline_interface ()
10548 if (ix86_cpu == PROCESSOR_PENTIUM)
10549 return 1;
10550 return 0;
10553 /* How many alternative schedules to try. This should be as wide as the
10554 scheduling freedom in the DFA, but no wider. Making this value too
10555 large results extra work for the scheduler. */
10557 static int
10558 ia32_multipass_dfa_lookahead ()
10560 if (ix86_cpu == PROCESSOR_PENTIUM)
10561 return 2;
10562 else
10563 return 0;
10567 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10568 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10569 appropriate. */
10571 void
10572 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10573 rtx insns;
10574 rtx dstref, srcref, dstreg, srcreg;
10576 rtx insn;
10578 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10579 if (INSN_P (insn))
10580 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10581 dstreg, srcreg);
10584 /* Subroutine of above to actually do the updating by recursively walking
10585 the rtx. */
10587 static void
10588 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10589 rtx x;
10590 rtx dstref, srcref, dstreg, srcreg;
10592 enum rtx_code code = GET_CODE (x);
10593 const char *format_ptr = GET_RTX_FORMAT (code);
10594 int i, j;
10596 if (code == MEM && XEXP (x, 0) == dstreg)
10597 MEM_COPY_ATTRIBUTES (x, dstref);
10598 else if (code == MEM && XEXP (x, 0) == srcreg)
10599 MEM_COPY_ATTRIBUTES (x, srcref);
10601 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10603 if (*format_ptr == 'e')
10604 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10605 dstreg, srcreg);
10606 else if (*format_ptr == 'E')
10607 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10608 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10609 dstreg, srcreg);
10613 /* Compute the alignment given to a constant that is being placed in memory.
10614 EXP is the constant and ALIGN is the alignment that the object would
10615 ordinarily have.
10616 The value of this function is used instead of that alignment to align
10617 the object. */
10620 ix86_constant_alignment (exp, align)
10621 tree exp;
10622 int align;
10624 if (TREE_CODE (exp) == REAL_CST)
10626 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10627 return 64;
10628 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10629 return 128;
10631 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10632 && align < 256)
10633 return 256;
10635 return align;
10638 /* Compute the alignment for a static variable.
10639 TYPE is the data type, and ALIGN is the alignment that
10640 the object would ordinarily have. The value of this function is used
10641 instead of that alignment to align the object. */
10644 ix86_data_alignment (type, align)
10645 tree type;
10646 int align;
10648 if (AGGREGATE_TYPE_P (type)
10649 && TYPE_SIZE (type)
10650 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10651 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10652 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10653 return 256;
10655 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10656 to 16byte boundary. */
10657 if (TARGET_64BIT)
10659 if (AGGREGATE_TYPE_P (type)
10660 && TYPE_SIZE (type)
10661 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10662 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10663 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10664 return 128;
10667 if (TREE_CODE (type) == ARRAY_TYPE)
10669 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10670 return 64;
10671 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10672 return 128;
10674 else if (TREE_CODE (type) == COMPLEX_TYPE)
10677 if (TYPE_MODE (type) == DCmode && align < 64)
10678 return 64;
10679 if (TYPE_MODE (type) == XCmode && align < 128)
10680 return 128;
10682 else if ((TREE_CODE (type) == RECORD_TYPE
10683 || TREE_CODE (type) == UNION_TYPE
10684 || TREE_CODE (type) == QUAL_UNION_TYPE)
10685 && TYPE_FIELDS (type))
10687 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10688 return 64;
10689 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10690 return 128;
10692 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10693 || TREE_CODE (type) == INTEGER_TYPE)
10695 if (TYPE_MODE (type) == DFmode && align < 64)
10696 return 64;
10697 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10698 return 128;
10701 return align;
10704 /* Compute the alignment for a local variable.
10705 TYPE is the data type, and ALIGN is the alignment that
10706 the object would ordinarily have. The value of this macro is used
10707 instead of that alignment to align the object. */
10710 ix86_local_alignment (type, align)
10711 tree type;
10712 int align;
10714 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10715 to 16byte boundary. */
10716 if (TARGET_64BIT)
10718 if (AGGREGATE_TYPE_P (type)
10719 && TYPE_SIZE (type)
10720 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10721 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10722 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10723 return 128;
10725 if (TREE_CODE (type) == ARRAY_TYPE)
10727 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10728 return 64;
10729 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10730 return 128;
10732 else if (TREE_CODE (type) == COMPLEX_TYPE)
10734 if (TYPE_MODE (type) == DCmode && align < 64)
10735 return 64;
10736 if (TYPE_MODE (type) == XCmode && align < 128)
10737 return 128;
10739 else if ((TREE_CODE (type) == RECORD_TYPE
10740 || TREE_CODE (type) == UNION_TYPE
10741 || TREE_CODE (type) == QUAL_UNION_TYPE)
10742 && TYPE_FIELDS (type))
10744 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10745 return 64;
10746 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10747 return 128;
10749 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10750 || TREE_CODE (type) == INTEGER_TYPE)
10753 if (TYPE_MODE (type) == DFmode && align < 64)
10754 return 64;
10755 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10756 return 128;
10758 return align;
10761 /* Emit RTL insns to initialize the variable parts of a trampoline.
10762 FNADDR is an RTX for the address of the function's pure code.
10763 CXT is an RTX for the static chain value for the function. */
10764 void
10765 x86_initialize_trampoline (tramp, fnaddr, cxt)
10766 rtx tramp, fnaddr, cxt;
10768 if (!TARGET_64BIT)
10770 /* Compute offset from the end of the jmp to the target function. */
10771 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10772 plus_constant (tramp, 10),
10773 NULL_RTX, 1, OPTAB_DIRECT);
10774 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10775 gen_int_mode (0xb9, QImode));
10776 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10777 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10778 gen_int_mode (0xe9, QImode));
10779 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10781 else
10783 int offset = 0;
10784 /* Try to load address using shorter movl instead of movabs.
10785 We may want to support movq for kernel mode, but kernel does not use
10786 trampolines at the moment. */
10787 if (x86_64_zero_extended_value (fnaddr))
10789 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10790 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10791 gen_int_mode (0xbb41, HImode));
10792 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10793 gen_lowpart (SImode, fnaddr));
10794 offset += 6;
10796 else
10798 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10799 gen_int_mode (0xbb49, HImode));
10800 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10801 fnaddr);
10802 offset += 10;
10804 /* Load static chain using movabs to r10. */
10805 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10806 gen_int_mode (0xba49, HImode));
10807 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10808 cxt);
10809 offset += 10;
10810 /* Jump to the r11 */
10811 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10812 gen_int_mode (0xff49, HImode));
10813 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10814 gen_int_mode (0xe3, QImode));
10815 offset += 3;
10816 if (offset > TRAMPOLINE_SIZE)
10817 abort ();
10821 #define def_builtin(MASK, NAME, TYPE, CODE) \
10822 do { \
10823 if ((MASK) & target_flags) \
10824 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10825 } while (0)
10827 struct builtin_description
10829 const unsigned int mask;
10830 const enum insn_code icode;
10831 const char *const name;
10832 const enum ix86_builtins code;
10833 const enum rtx_code comparison;
10834 const unsigned int flag;
10837 /* Used for builtins that are enabled both by -msse and -msse2. */
10838 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
10840 static const struct builtin_description bdesc_comi[] =
10842 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10843 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10844 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10845 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10846 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10847 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10848 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10849 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10850 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10851 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10852 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10853 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
10854 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
10855 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
10856 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
10857 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
10858 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
10859 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
10860 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
10861 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
10862 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
10863 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
10864 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
10865 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
10868 static const struct builtin_description bdesc_2arg[] =
10870 /* SSE */
10871 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10872 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10873 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10874 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10875 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10876 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10877 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10878 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10880 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10881 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10882 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10883 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10884 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10885 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10886 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10887 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10888 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10889 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10890 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10891 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10892 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10893 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10894 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10895 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10896 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10897 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10898 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10899 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10900 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10901 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10902 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10903 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10905 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10906 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10907 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10908 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10910 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10911 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10912 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10913 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10914 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10916 /* MMX */
10917 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10918 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10919 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10920 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10921 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10922 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10924 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10925 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10926 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10927 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10928 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10929 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10930 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10931 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10933 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10934 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10935 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10937 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10938 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10939 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10940 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10942 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10943 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10945 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10946 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10947 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10948 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10949 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10950 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10952 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10953 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10954 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10955 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10957 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10958 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10959 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10960 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10961 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10962 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10964 /* Special. */
10965 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10966 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10967 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10969 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10970 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10972 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10973 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10974 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10975 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10976 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10977 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10979 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10980 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10981 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10982 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10983 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10984 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10986 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10987 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10988 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10989 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10991 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10992 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
10994 /* SSE2 */
10995 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
10996 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
10997 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
10998 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
10999 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11000 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11001 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11002 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11004 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11005 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11006 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11007 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11008 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11009 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11010 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11011 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11012 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11013 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11014 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11015 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11016 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11017 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11018 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11019 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11020 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11021 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11022 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11023 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11024 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11025 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11026 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11027 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11029 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11030 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11031 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11032 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11034 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11035 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11036 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11037 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11039 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11040 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11041 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11043 /* SSE2 MMX */
11044 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11045 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11046 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11047 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11048 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11049 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11050 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11051 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11053 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11054 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11055 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11056 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11057 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11058 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11059 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11060 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11062 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11063 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11064 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11065 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11067 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11068 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11069 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11070 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11072 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11073 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11075 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11076 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11077 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11078 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11079 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11080 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11082 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11083 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11084 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11085 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11087 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11088 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11089 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11090 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11091 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11092 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11094 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11095 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11096 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11098 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11099 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11101 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11102 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11103 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11104 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11105 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11106 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11108 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11109 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11110 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11111 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11112 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11113 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11115 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11116 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11117 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11118 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11120 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11122 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11123 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11124 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11127 static const struct builtin_description bdesc_1arg[] =
11129 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11130 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11132 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11133 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11134 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11136 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11137 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11138 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11139 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11141 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11142 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11143 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11145 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11147 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11148 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11150 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11151 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11152 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11153 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11154 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11156 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11158 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11159 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11161 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11162 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11163 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11166 void
11167 ix86_init_builtins ()
11169 if (TARGET_MMX)
11170 ix86_init_mmx_sse_builtins ();
11173 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11174 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11175 builtins. */
11176 static void
11177 ix86_init_mmx_sse_builtins ()
11179 const struct builtin_description * d;
11180 size_t i;
11181 tree endlink = void_list_node;
11183 tree pchar_type_node = build_pointer_type (char_type_node);
11184 tree pfloat_type_node = build_pointer_type (float_type_node);
11185 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11186 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11187 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11189 /* Comparisons. */
11190 tree int_ftype_v4sf_v4sf
11191 = build_function_type (integer_type_node,
11192 tree_cons (NULL_TREE, V4SF_type_node,
11193 tree_cons (NULL_TREE,
11194 V4SF_type_node,
11195 endlink)));
11196 tree v4si_ftype_v4sf_v4sf
11197 = build_function_type (V4SI_type_node,
11198 tree_cons (NULL_TREE, V4SF_type_node,
11199 tree_cons (NULL_TREE,
11200 V4SF_type_node,
11201 endlink)));
11202 /* MMX/SSE/integer conversions. */
11203 tree int_ftype_v4sf
11204 = build_function_type (integer_type_node,
11205 tree_cons (NULL_TREE, V4SF_type_node,
11206 endlink));
11207 tree int_ftype_v8qi
11208 = build_function_type (integer_type_node,
11209 tree_cons (NULL_TREE, V8QI_type_node,
11210 endlink));
11211 tree v4sf_ftype_v4sf_int
11212 = build_function_type (V4SF_type_node,
11213 tree_cons (NULL_TREE, V4SF_type_node,
11214 tree_cons (NULL_TREE, integer_type_node,
11215 endlink)));
11216 tree v4sf_ftype_v4sf_v2si
11217 = build_function_type (V4SF_type_node,
11218 tree_cons (NULL_TREE, V4SF_type_node,
11219 tree_cons (NULL_TREE, V2SI_type_node,
11220 endlink)));
11221 tree int_ftype_v4hi_int
11222 = build_function_type (integer_type_node,
11223 tree_cons (NULL_TREE, V4HI_type_node,
11224 tree_cons (NULL_TREE, integer_type_node,
11225 endlink)));
11226 tree v4hi_ftype_v4hi_int_int
11227 = build_function_type (V4HI_type_node,
11228 tree_cons (NULL_TREE, V4HI_type_node,
11229 tree_cons (NULL_TREE, integer_type_node,
11230 tree_cons (NULL_TREE,
11231 integer_type_node,
11232 endlink))));
11233 /* Miscellaneous. */
11234 tree v8qi_ftype_v4hi_v4hi
11235 = build_function_type (V8QI_type_node,
11236 tree_cons (NULL_TREE, V4HI_type_node,
11237 tree_cons (NULL_TREE, V4HI_type_node,
11238 endlink)));
11239 tree v4hi_ftype_v2si_v2si
11240 = build_function_type (V4HI_type_node,
11241 tree_cons (NULL_TREE, V2SI_type_node,
11242 tree_cons (NULL_TREE, V2SI_type_node,
11243 endlink)));
11244 tree v4sf_ftype_v4sf_v4sf_int
11245 = build_function_type (V4SF_type_node,
11246 tree_cons (NULL_TREE, V4SF_type_node,
11247 tree_cons (NULL_TREE, V4SF_type_node,
11248 tree_cons (NULL_TREE,
11249 integer_type_node,
11250 endlink))));
11251 tree v2si_ftype_v4hi_v4hi
11252 = build_function_type (V2SI_type_node,
11253 tree_cons (NULL_TREE, V4HI_type_node,
11254 tree_cons (NULL_TREE, V4HI_type_node,
11255 endlink)));
11256 tree v4hi_ftype_v4hi_int
11257 = build_function_type (V4HI_type_node,
11258 tree_cons (NULL_TREE, V4HI_type_node,
11259 tree_cons (NULL_TREE, integer_type_node,
11260 endlink)));
11261 tree v4hi_ftype_v4hi_di
11262 = build_function_type (V4HI_type_node,
11263 tree_cons (NULL_TREE, V4HI_type_node,
11264 tree_cons (NULL_TREE,
11265 long_long_integer_type_node,
11266 endlink)));
11267 tree v2si_ftype_v2si_di
11268 = build_function_type (V2SI_type_node,
11269 tree_cons (NULL_TREE, V2SI_type_node,
11270 tree_cons (NULL_TREE,
11271 long_long_integer_type_node,
11272 endlink)));
11273 tree void_ftype_void
11274 = build_function_type (void_type_node, endlink);
11275 tree void_ftype_unsigned
11276 = build_function_type (void_type_node,
11277 tree_cons (NULL_TREE, unsigned_type_node,
11278 endlink));
11279 tree unsigned_ftype_void
11280 = build_function_type (unsigned_type_node, endlink);
11281 tree di_ftype_void
11282 = build_function_type (long_long_unsigned_type_node, endlink);
11283 tree v4sf_ftype_void
11284 = build_function_type (V4SF_type_node, endlink);
11285 tree v2si_ftype_v4sf
11286 = build_function_type (V2SI_type_node,
11287 tree_cons (NULL_TREE, V4SF_type_node,
11288 endlink));
11289 /* Loads/stores. */
11290 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11291 tree_cons (NULL_TREE, V8QI_type_node,
11292 tree_cons (NULL_TREE,
11293 pchar_type_node,
11294 endlink)));
11295 tree void_ftype_v8qi_v8qi_pchar
11296 = build_function_type (void_type_node, maskmovq_args);
11297 tree v4sf_ftype_pfloat
11298 = build_function_type (V4SF_type_node,
11299 tree_cons (NULL_TREE, pfloat_type_node,
11300 endlink));
11301 /* @@@ the type is bogus */
11302 tree v4sf_ftype_v4sf_pv2si
11303 = build_function_type (V4SF_type_node,
11304 tree_cons (NULL_TREE, V4SF_type_node,
11305 tree_cons (NULL_TREE, pv2si_type_node,
11306 endlink)));
11307 tree void_ftype_pv2si_v4sf
11308 = build_function_type (void_type_node,
11309 tree_cons (NULL_TREE, pv2si_type_node,
11310 tree_cons (NULL_TREE, V4SF_type_node,
11311 endlink)));
11312 tree void_ftype_pfloat_v4sf
11313 = build_function_type (void_type_node,
11314 tree_cons (NULL_TREE, pfloat_type_node,
11315 tree_cons (NULL_TREE, V4SF_type_node,
11316 endlink)));
11317 tree void_ftype_pdi_di
11318 = build_function_type (void_type_node,
11319 tree_cons (NULL_TREE, pdi_type_node,
11320 tree_cons (NULL_TREE,
11321 long_long_unsigned_type_node,
11322 endlink)));
11323 tree void_ftype_pv2di_v2di
11324 = build_function_type (void_type_node,
11325 tree_cons (NULL_TREE, pv2di_type_node,
11326 tree_cons (NULL_TREE,
11327 V2DI_type_node,
11328 endlink)));
11329 /* Normal vector unops. */
11330 tree v4sf_ftype_v4sf
11331 = build_function_type (V4SF_type_node,
11332 tree_cons (NULL_TREE, V4SF_type_node,
11333 endlink));
11335 /* Normal vector binops. */
11336 tree v4sf_ftype_v4sf_v4sf
11337 = build_function_type (V4SF_type_node,
11338 tree_cons (NULL_TREE, V4SF_type_node,
11339 tree_cons (NULL_TREE, V4SF_type_node,
11340 endlink)));
11341 tree v8qi_ftype_v8qi_v8qi
11342 = build_function_type (V8QI_type_node,
11343 tree_cons (NULL_TREE, V8QI_type_node,
11344 tree_cons (NULL_TREE, V8QI_type_node,
11345 endlink)));
11346 tree v4hi_ftype_v4hi_v4hi
11347 = build_function_type (V4HI_type_node,
11348 tree_cons (NULL_TREE, V4HI_type_node,
11349 tree_cons (NULL_TREE, V4HI_type_node,
11350 endlink)));
11351 tree v2si_ftype_v2si_v2si
11352 = build_function_type (V2SI_type_node,
11353 tree_cons (NULL_TREE, V2SI_type_node,
11354 tree_cons (NULL_TREE, V2SI_type_node,
11355 endlink)));
11356 tree di_ftype_di_di
11357 = build_function_type (long_long_unsigned_type_node,
11358 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11359 tree_cons (NULL_TREE,
11360 long_long_unsigned_type_node,
11361 endlink)));
11363 tree v2si_ftype_v2sf
11364 = build_function_type (V2SI_type_node,
11365 tree_cons (NULL_TREE, V2SF_type_node,
11366 endlink));
11367 tree v2sf_ftype_v2si
11368 = build_function_type (V2SF_type_node,
11369 tree_cons (NULL_TREE, V2SI_type_node,
11370 endlink));
11371 tree v2si_ftype_v2si
11372 = build_function_type (V2SI_type_node,
11373 tree_cons (NULL_TREE, V2SI_type_node,
11374 endlink));
11375 tree v2sf_ftype_v2sf
11376 = build_function_type (V2SF_type_node,
11377 tree_cons (NULL_TREE, V2SF_type_node,
11378 endlink));
11379 tree v2sf_ftype_v2sf_v2sf
11380 = build_function_type (V2SF_type_node,
11381 tree_cons (NULL_TREE, V2SF_type_node,
11382 tree_cons (NULL_TREE,
11383 V2SF_type_node,
11384 endlink)));
11385 tree v2si_ftype_v2sf_v2sf
11386 = build_function_type (V2SI_type_node,
11387 tree_cons (NULL_TREE, V2SF_type_node,
11388 tree_cons (NULL_TREE,
11389 V2SF_type_node,
11390 endlink)));
11391 tree pint_type_node = build_pointer_type (integer_type_node);
11392 tree pdouble_type_node = build_pointer_type (double_type_node);
11393 tree int_ftype_v2df_v2df
11394 = build_function_type (integer_type_node,
11395 tree_cons (NULL_TREE, V2DF_type_node,
11396 tree_cons (NULL_TREE, V2DF_type_node, endlink)));
11398 tree ti_ftype_void
11399 = build_function_type (intTI_type_node, endlink);
11400 tree ti_ftype_ti_ti
11401 = build_function_type (intTI_type_node,
11402 tree_cons (NULL_TREE, intTI_type_node,
11403 tree_cons (NULL_TREE, intTI_type_node,
11404 endlink)));
11405 tree void_ftype_pvoid
11406 = build_function_type (void_type_node,
11407 tree_cons (NULL_TREE, ptr_type_node, endlink));
11408 tree v2di_ftype_di
11409 = build_function_type (V2DI_type_node,
11410 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11411 endlink));
11412 tree v4sf_ftype_v4si
11413 = build_function_type (V4SF_type_node,
11414 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11415 tree v4si_ftype_v4sf
11416 = build_function_type (V4SI_type_node,
11417 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11418 tree v2df_ftype_v4si
11419 = build_function_type (V2DF_type_node,
11420 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11421 tree v4si_ftype_v2df
11422 = build_function_type (V4SI_type_node,
11423 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11424 tree v2si_ftype_v2df
11425 = build_function_type (V2SI_type_node,
11426 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11427 tree v4sf_ftype_v2df
11428 = build_function_type (V4SF_type_node,
11429 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11430 tree v2df_ftype_v2si
11431 = build_function_type (V2DF_type_node,
11432 tree_cons (NULL_TREE, V2SI_type_node, endlink));
11433 tree v2df_ftype_v4sf
11434 = build_function_type (V2DF_type_node,
11435 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11436 tree int_ftype_v2df
11437 = build_function_type (integer_type_node,
11438 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11439 tree v2df_ftype_v2df_int
11440 = build_function_type (V2DF_type_node,
11441 tree_cons (NULL_TREE, V2DF_type_node,
11442 tree_cons (NULL_TREE, integer_type_node,
11443 endlink)));
11444 tree v4sf_ftype_v4sf_v2df
11445 = build_function_type (V4SF_type_node,
11446 tree_cons (NULL_TREE, V4SF_type_node,
11447 tree_cons (NULL_TREE, V2DF_type_node,
11448 endlink)));
11449 tree v2df_ftype_v2df_v4sf
11450 = build_function_type (V2DF_type_node,
11451 tree_cons (NULL_TREE, V2DF_type_node,
11452 tree_cons (NULL_TREE, V4SF_type_node,
11453 endlink)));
11454 tree v2df_ftype_v2df_v2df_int
11455 = build_function_type (V2DF_type_node,
11456 tree_cons (NULL_TREE, V2DF_type_node,
11457 tree_cons (NULL_TREE, V2DF_type_node,
11458 tree_cons (NULL_TREE,
11459 integer_type_node,
11460 endlink))));
11461 tree v2df_ftype_v2df_pv2si
11462 = build_function_type (V2DF_type_node,
11463 tree_cons (NULL_TREE, V2DF_type_node,
11464 tree_cons (NULL_TREE, pv2si_type_node,
11465 endlink)));
11466 tree void_ftype_pv2si_v2df
11467 = build_function_type (void_type_node,
11468 tree_cons (NULL_TREE, pv2si_type_node,
11469 tree_cons (NULL_TREE, V2DF_type_node,
11470 endlink)));
11471 tree void_ftype_pdouble_v2df
11472 = build_function_type (void_type_node,
11473 tree_cons (NULL_TREE, pdouble_type_node,
11474 tree_cons (NULL_TREE, V2DF_type_node,
11475 endlink)));
11476 tree void_ftype_pint_int
11477 = build_function_type (void_type_node,
11478 tree_cons (NULL_TREE, pint_type_node,
11479 tree_cons (NULL_TREE, integer_type_node,
11480 endlink)));
11481 tree maskmovdqu_args = tree_cons (NULL_TREE, V16QI_type_node,
11482 tree_cons (NULL_TREE, V16QI_type_node,
11483 tree_cons (NULL_TREE,
11484 pchar_type_node,
11485 endlink)));
11486 tree void_ftype_v16qi_v16qi_pchar
11487 = build_function_type (void_type_node, maskmovdqu_args);
11488 tree v2df_ftype_pdouble
11489 = build_function_type (V2DF_type_node,
11490 tree_cons (NULL_TREE, pdouble_type_node,
11491 endlink));
11492 tree v2df_ftype_v2df_v2df
11493 = build_function_type (V2DF_type_node,
11494 tree_cons (NULL_TREE, V2DF_type_node,
11495 tree_cons (NULL_TREE, V2DF_type_node,
11496 endlink)));
11497 tree v16qi_ftype_v16qi_v16qi
11498 = build_function_type (V16QI_type_node,
11499 tree_cons (NULL_TREE, V16QI_type_node,
11500 tree_cons (NULL_TREE, V16QI_type_node,
11501 endlink)));
11502 tree v8hi_ftype_v8hi_v8hi
11503 = build_function_type (V8HI_type_node,
11504 tree_cons (NULL_TREE, V8HI_type_node,
11505 tree_cons (NULL_TREE, V8HI_type_node,
11506 endlink)));
11507 tree v4si_ftype_v4si_v4si
11508 = build_function_type (V4SI_type_node,
11509 tree_cons (NULL_TREE, V4SI_type_node,
11510 tree_cons (NULL_TREE, V4SI_type_node,
11511 endlink)));
11512 tree v2di_ftype_v2di_v2di
11513 = build_function_type (V2DI_type_node,
11514 tree_cons (NULL_TREE, V2DI_type_node,
11515 tree_cons (NULL_TREE, V2DI_type_node,
11516 endlink)));
11517 tree v2di_ftype_v2df_v2df
11518 = build_function_type (V2DI_type_node,
11519 tree_cons (NULL_TREE, V2DF_type_node,
11520 tree_cons (NULL_TREE, V2DF_type_node,
11521 endlink)));
11522 tree v2df_ftype_v2df
11523 = build_function_type (V2DF_type_node,
11524 tree_cons (NULL_TREE, V2DF_type_node,
11525 endlink));
11526 tree v2df_ftype_double
11527 = build_function_type (V2DF_type_node,
11528 tree_cons (NULL_TREE, double_type_node,
11529 endlink));
11530 tree v2df_ftype_double_double
11531 = build_function_type (V2DF_type_node,
11532 tree_cons (NULL_TREE, double_type_node,
11533 tree_cons (NULL_TREE, double_type_node,
11534 endlink)));
11535 tree int_ftype_v8hi_int
11536 = build_function_type (integer_type_node,
11537 tree_cons (NULL_TREE, V8HI_type_node,
11538 tree_cons (NULL_TREE, integer_type_node,
11539 endlink)));
11540 tree v8hi_ftype_v8hi_int_int
11541 = build_function_type (V8HI_type_node,
11542 tree_cons (NULL_TREE, V8HI_type_node,
11543 tree_cons (NULL_TREE, integer_type_node,
11544 tree_cons (NULL_TREE,
11545 integer_type_node,
11546 endlink))));
11547 tree v2di_ftype_v2di_int
11548 = build_function_type (V2DI_type_node,
11549 tree_cons (NULL_TREE, V2DI_type_node,
11550 tree_cons (NULL_TREE, integer_type_node,
11551 endlink)));
11552 tree v4si_ftype_v4si_int
11553 = build_function_type (V4SI_type_node,
11554 tree_cons (NULL_TREE, V4SI_type_node,
11555 tree_cons (NULL_TREE, integer_type_node,
11556 endlink)));
11557 tree v8hi_ftype_v8hi_int
11558 = build_function_type (V8HI_type_node,
11559 tree_cons (NULL_TREE, V8HI_type_node,
11560 tree_cons (NULL_TREE, integer_type_node,
11561 endlink)));
11562 tree v8hi_ftype_v8hi_v2di
11563 = build_function_type (V8HI_type_node,
11564 tree_cons (NULL_TREE, V8HI_type_node,
11565 tree_cons (NULL_TREE, V2DI_type_node,
11566 endlink)));
11567 tree v4si_ftype_v4si_v2di
11568 = build_function_type (V4SI_type_node,
11569 tree_cons (NULL_TREE, V4SI_type_node,
11570 tree_cons (NULL_TREE, V2DI_type_node,
11571 endlink)));
11572 tree v4si_ftype_v8hi_v8hi
11573 = build_function_type (V4SI_type_node,
11574 tree_cons (NULL_TREE, V8HI_type_node,
11575 tree_cons (NULL_TREE, V8HI_type_node,
11576 endlink)));
11577 tree di_ftype_v8qi_v8qi
11578 = build_function_type (long_long_unsigned_type_node,
11579 tree_cons (NULL_TREE, V8QI_type_node,
11580 tree_cons (NULL_TREE, V8QI_type_node,
11581 endlink)));
11582 tree v2di_ftype_v16qi_v16qi
11583 = build_function_type (V2DI_type_node,
11584 tree_cons (NULL_TREE, V16QI_type_node,
11585 tree_cons (NULL_TREE, V16QI_type_node,
11586 endlink)));
11587 tree int_ftype_v16qi
11588 = build_function_type (integer_type_node,
11589 tree_cons (NULL_TREE, V16QI_type_node, endlink));
11591 /* Add all builtins that are more or less simple operations on two
11592 operands. */
11593 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
11595 /* Use one of the operands; the target can have a different mode for
11596 mask-generating compares. */
11597 enum machine_mode mode;
11598 tree type;
11600 if (d->name == 0)
11601 continue;
11602 mode = insn_data[d->icode].operand[1].mode;
11604 switch (mode)
11606 case V16QImode:
11607 type = v16qi_ftype_v16qi_v16qi;
11608 break;
11609 case V8HImode:
11610 type = v8hi_ftype_v8hi_v8hi;
11611 break;
11612 case V4SImode:
11613 type = v4si_ftype_v4si_v4si;
11614 break;
11615 case V2DImode:
11616 type = v2di_ftype_v2di_v2di;
11617 break;
11618 case V2DFmode:
11619 type = v2df_ftype_v2df_v2df;
11620 break;
11621 case TImode:
11622 type = ti_ftype_ti_ti;
11623 break;
11624 case V4SFmode:
11625 type = v4sf_ftype_v4sf_v4sf;
11626 break;
11627 case V8QImode:
11628 type = v8qi_ftype_v8qi_v8qi;
11629 break;
11630 case V4HImode:
11631 type = v4hi_ftype_v4hi_v4hi;
11632 break;
11633 case V2SImode:
11634 type = v2si_ftype_v2si_v2si;
11635 break;
11636 case DImode:
11637 type = di_ftype_di_di;
11638 break;
11640 default:
11641 abort ();
11644 /* Override for comparisons. */
11645 if (d->icode == CODE_FOR_maskcmpv4sf3
11646 || d->icode == CODE_FOR_maskncmpv4sf3
11647 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11648 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11649 type = v4si_ftype_v4sf_v4sf;
11651 if (d->icode == CODE_FOR_maskcmpv2df3
11652 || d->icode == CODE_FOR_maskncmpv2df3
11653 || d->icode == CODE_FOR_vmmaskcmpv2df3
11654 || d->icode == CODE_FOR_vmmaskncmpv2df3)
11655 type = v2di_ftype_v2df_v2df;
11657 def_builtin (d->mask, d->name, type, d->code);
11660 /* Add the remaining MMX insns with somewhat more complicated types. */
11661 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11662 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11663 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11664 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11665 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11666 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11667 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11669 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11670 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11671 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11673 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11674 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11676 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11677 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11679 /* comi/ucomi insns. */
11680 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
11681 if (d->mask == MASK_SSE2)
11682 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
11683 else
11684 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11686 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11687 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11688 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11690 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11691 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11692 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11693 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11694 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11695 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11697 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11698 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11699 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11700 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11702 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11703 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11705 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11707 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11708 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11709 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11710 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11711 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11712 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11714 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11715 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11716 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11717 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11719 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11720 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11721 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11722 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11724 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11726 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11728 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11729 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11730 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11731 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11732 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11733 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11735 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11737 /* Original 3DNow! */
11738 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11739 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11740 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11741 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11742 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11743 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11744 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11745 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11746 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11747 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11748 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11749 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11750 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11751 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11752 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11753 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11754 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11755 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11756 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11757 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11759 /* 3DNow! extension as used in the Athlon CPU. */
11760 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11761 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11762 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11763 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11764 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11765 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11767 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11769 /* SSE2 */
11770 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
11771 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
11773 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
11774 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
11776 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
11777 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
11778 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
11779 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
11780 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
11781 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
11783 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
11784 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
11785 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
11786 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
11788 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
11789 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
11790 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
11791 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
11792 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
11794 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
11795 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
11796 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
11797 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
11799 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
11800 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
11802 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
11804 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
11805 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
11807 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
11808 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
11809 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
11810 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
11811 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
11813 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
11815 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
11816 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
11818 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
11819 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
11820 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
11822 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
11823 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
11824 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
11826 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
11827 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
11828 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
11829 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
11830 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
11831 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
11832 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
11834 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
11835 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
11836 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
11838 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
11839 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
11840 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
11842 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
11843 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
11844 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
11846 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
11847 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
11849 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
11850 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
11851 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
11853 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
11854 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
11855 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
11857 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
11858 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
11860 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
11863 /* Errors in the source file can cause expand_expr to return const0_rtx
11864 where we expect a vector. To avoid crashing, use one of the vector
11865 clear instructions. */
11866 static rtx
11867 safe_vector_operand (x, mode)
11868 rtx x;
11869 enum machine_mode mode;
11871 if (x != const0_rtx)
11872 return x;
11873 x = gen_reg_rtx (mode);
11875 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11876 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11877 : gen_rtx_SUBREG (DImode, x, 0)));
11878 else
11879 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11880 : gen_rtx_SUBREG (V4SFmode, x, 0)));
11881 return x;
11884 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11886 static rtx
11887 ix86_expand_binop_builtin (icode, arglist, target)
11888 enum insn_code icode;
11889 tree arglist;
11890 rtx target;
11892 rtx pat;
11893 tree arg0 = TREE_VALUE (arglist);
11894 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11895 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11896 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11897 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11898 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11899 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11901 if (VECTOR_MODE_P (mode0))
11902 op0 = safe_vector_operand (op0, mode0);
11903 if (VECTOR_MODE_P (mode1))
11904 op1 = safe_vector_operand (op1, mode1);
11906 if (! target
11907 || GET_MODE (target) != tmode
11908 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11909 target = gen_reg_rtx (tmode);
11911 /* In case the insn wants input operands in modes different from
11912 the result, abort. */
11913 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11914 abort ();
11916 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11917 op0 = copy_to_mode_reg (mode0, op0);
11918 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11919 op1 = copy_to_mode_reg (mode1, op1);
11921 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11922 yet one of the two must not be a memory. This is normally enforced
11923 by expanders, but we didn't bother to create one here. */
11924 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11925 op0 = copy_to_mode_reg (mode0, op0);
11927 pat = GEN_FCN (icode) (target, op0, op1);
11928 if (! pat)
11929 return 0;
11930 emit_insn (pat);
11931 return target;
11934 /* In type_for_mode we restrict the ability to create TImode types
11935 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11936 to have a V4SFmode signature. Convert them in-place to TImode. */
11938 static rtx
11939 ix86_expand_timode_binop_builtin (icode, arglist, target)
11940 enum insn_code icode;
11941 tree arglist;
11942 rtx target;
11944 rtx pat;
11945 tree arg0 = TREE_VALUE (arglist);
11946 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11947 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11948 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11950 op0 = gen_lowpart (TImode, op0);
11951 op1 = gen_lowpart (TImode, op1);
11952 target = gen_reg_rtx (TImode);
11954 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11955 op0 = copy_to_mode_reg (TImode, op0);
11956 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11957 op1 = copy_to_mode_reg (TImode, op1);
11959 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11960 yet one of the two must not be a memory. This is normally enforced
11961 by expanders, but we didn't bother to create one here. */
11962 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11963 op0 = copy_to_mode_reg (TImode, op0);
11965 pat = GEN_FCN (icode) (target, op0, op1);
11966 if (! pat)
11967 return 0;
11968 emit_insn (pat);
11970 return gen_lowpart (V4SFmode, target);
11973 /* Subroutine of ix86_expand_builtin to take care of stores. */
11975 static rtx
11976 ix86_expand_store_builtin (icode, arglist)
11977 enum insn_code icode;
11978 tree arglist;
11980 rtx pat;
11981 tree arg0 = TREE_VALUE (arglist);
11982 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11983 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11984 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11985 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11986 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11988 if (VECTOR_MODE_P (mode1))
11989 op1 = safe_vector_operand (op1, mode1);
11991 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11993 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11994 op1 = copy_to_mode_reg (mode1, op1);
11996 pat = GEN_FCN (icode) (op0, op1);
11997 if (pat)
11998 emit_insn (pat);
11999 return 0;
12002 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12004 static rtx
12005 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12006 enum insn_code icode;
12007 tree arglist;
12008 rtx target;
12009 int do_load;
12011 rtx pat;
12012 tree arg0 = TREE_VALUE (arglist);
12013 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12014 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12015 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12017 if (! target
12018 || GET_MODE (target) != tmode
12019 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12020 target = gen_reg_rtx (tmode);
12021 if (do_load)
12022 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12023 else
12025 if (VECTOR_MODE_P (mode0))
12026 op0 = safe_vector_operand (op0, mode0);
12028 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12029 op0 = copy_to_mode_reg (mode0, op0);
12032 pat = GEN_FCN (icode) (target, op0);
12033 if (! pat)
12034 return 0;
12035 emit_insn (pat);
12036 return target;
12039 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12040 sqrtss, rsqrtss, rcpss. */
12042 static rtx
12043 ix86_expand_unop1_builtin (icode, arglist, target)
12044 enum insn_code icode;
12045 tree arglist;
12046 rtx target;
12048 rtx pat;
12049 tree arg0 = TREE_VALUE (arglist);
12050 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12051 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12052 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12054 if (! target
12055 || GET_MODE (target) != tmode
12056 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12057 target = gen_reg_rtx (tmode);
12059 if (VECTOR_MODE_P (mode0))
12060 op0 = safe_vector_operand (op0, mode0);
12062 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12063 op0 = copy_to_mode_reg (mode0, op0);
12065 op1 = op0;
12066 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12067 op1 = copy_to_mode_reg (mode0, op1);
12069 pat = GEN_FCN (icode) (target, op0, op1);
12070 if (! pat)
12071 return 0;
12072 emit_insn (pat);
12073 return target;
12076 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12078 static rtx
12079 ix86_expand_sse_compare (d, arglist, target)
12080 const struct builtin_description *d;
12081 tree arglist;
12082 rtx target;
12084 rtx pat;
12085 tree arg0 = TREE_VALUE (arglist);
12086 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12087 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12088 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12089 rtx op2;
12090 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12091 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12092 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12093 enum rtx_code comparison = d->comparison;
12095 if (VECTOR_MODE_P (mode0))
12096 op0 = safe_vector_operand (op0, mode0);
12097 if (VECTOR_MODE_P (mode1))
12098 op1 = safe_vector_operand (op1, mode1);
12100 /* Swap operands if we have a comparison that isn't available in
12101 hardware. */
12102 if (d->flag)
12104 rtx tmp = gen_reg_rtx (mode1);
12105 emit_move_insn (tmp, op1);
12106 op1 = op0;
12107 op0 = tmp;
12110 if (! target
12111 || GET_MODE (target) != tmode
12112 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12113 target = gen_reg_rtx (tmode);
12115 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12116 op0 = copy_to_mode_reg (mode0, op0);
12117 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12118 op1 = copy_to_mode_reg (mode1, op1);
12120 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12121 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12122 if (! pat)
12123 return 0;
12124 emit_insn (pat);
12125 return target;
12128 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12130 static rtx
12131 ix86_expand_sse_comi (d, arglist, target)
12132 const struct builtin_description *d;
12133 tree arglist;
12134 rtx target;
12136 rtx pat;
12137 tree arg0 = TREE_VALUE (arglist);
12138 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12139 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12140 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12141 rtx op2;
12142 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12143 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12144 enum rtx_code comparison = d->comparison;
12146 if (VECTOR_MODE_P (mode0))
12147 op0 = safe_vector_operand (op0, mode0);
12148 if (VECTOR_MODE_P (mode1))
12149 op1 = safe_vector_operand (op1, mode1);
12151 /* Swap operands if we have a comparison that isn't available in
12152 hardware. */
12153 if (d->flag)
12155 rtx tmp = op1;
12156 op1 = op0;
12157 op0 = tmp;
12160 target = gen_reg_rtx (SImode);
12161 emit_move_insn (target, const0_rtx);
12162 target = gen_rtx_SUBREG (QImode, target, 0);
12164 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12165 op0 = copy_to_mode_reg (mode0, op0);
12166 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12167 op1 = copy_to_mode_reg (mode1, op1);
12169 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12170 pat = GEN_FCN (d->icode) (op0, op1, op2);
12171 if (! pat)
12172 return 0;
12173 emit_insn (pat);
12174 emit_insn (gen_rtx_SET (VOIDmode,
12175 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12176 gen_rtx_fmt_ee (comparison, QImode,
12177 gen_rtx_REG (CCmode, FLAGS_REG),
12178 const0_rtx)));
12180 return SUBREG_REG (target);
12183 /* Expand an expression EXP that calls a built-in function,
12184 with result going to TARGET if that's convenient
12185 (and in mode MODE if that's convenient).
12186 SUBTARGET may be used as the target for computing one of EXP's operands.
12187 IGNORE is nonzero if the value is to be ignored. */
12190 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12191 tree exp;
12192 rtx target;
12193 rtx subtarget ATTRIBUTE_UNUSED;
12194 enum machine_mode mode ATTRIBUTE_UNUSED;
12195 int ignore ATTRIBUTE_UNUSED;
12197 const struct builtin_description *d;
12198 size_t i;
12199 enum insn_code icode;
12200 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12201 tree arglist = TREE_OPERAND (exp, 1);
12202 tree arg0, arg1, arg2;
12203 rtx op0, op1, op2, pat;
12204 enum machine_mode tmode, mode0, mode1, mode2;
12205 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12207 switch (fcode)
12209 case IX86_BUILTIN_EMMS:
12210 emit_insn (gen_emms ());
12211 return 0;
12213 case IX86_BUILTIN_SFENCE:
12214 emit_insn (gen_sfence ());
12215 return 0;
12217 case IX86_BUILTIN_PEXTRW:
12218 case IX86_BUILTIN_PEXTRW128:
12219 icode = (fcode == IX86_BUILTIN_PEXTRW
12220 ? CODE_FOR_mmx_pextrw
12221 : CODE_FOR_sse2_pextrw);
12222 arg0 = TREE_VALUE (arglist);
12223 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12224 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12225 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12226 tmode = insn_data[icode].operand[0].mode;
12227 mode0 = insn_data[icode].operand[1].mode;
12228 mode1 = insn_data[icode].operand[2].mode;
12230 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12231 op0 = copy_to_mode_reg (mode0, op0);
12232 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12234 /* @@@ better error message */
12235 error ("selector must be an immediate");
12236 return gen_reg_rtx (tmode);
12238 if (target == 0
12239 || GET_MODE (target) != tmode
12240 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12241 target = gen_reg_rtx (tmode);
12242 pat = GEN_FCN (icode) (target, op0, op1);
12243 if (! pat)
12244 return 0;
12245 emit_insn (pat);
12246 return target;
12248 case IX86_BUILTIN_PINSRW:
12249 case IX86_BUILTIN_PINSRW128:
12250 icode = (fcode == IX86_BUILTIN_PINSRW
12251 ? CODE_FOR_mmx_pinsrw
12252 : CODE_FOR_sse2_pinsrw);
12253 arg0 = TREE_VALUE (arglist);
12254 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12255 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12256 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12257 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12258 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12259 tmode = insn_data[icode].operand[0].mode;
12260 mode0 = insn_data[icode].operand[1].mode;
12261 mode1 = insn_data[icode].operand[2].mode;
12262 mode2 = insn_data[icode].operand[3].mode;
12264 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12265 op0 = copy_to_mode_reg (mode0, op0);
12266 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12267 op1 = copy_to_mode_reg (mode1, op1);
12268 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12270 /* @@@ better error message */
12271 error ("selector must be an immediate");
12272 return const0_rtx;
12274 if (target == 0
12275 || GET_MODE (target) != tmode
12276 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12277 target = gen_reg_rtx (tmode);
12278 pat = GEN_FCN (icode) (target, op0, op1, op2);
12279 if (! pat)
12280 return 0;
12281 emit_insn (pat);
12282 return target;
12284 case IX86_BUILTIN_MASKMOVQ:
12285 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12286 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12287 : CODE_FOR_sse2_maskmovdqu);
12288 /* Note the arg order is different from the operand order. */
12289 arg1 = TREE_VALUE (arglist);
12290 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12291 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12292 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12293 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12294 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12295 mode0 = insn_data[icode].operand[0].mode;
12296 mode1 = insn_data[icode].operand[1].mode;
12297 mode2 = insn_data[icode].operand[2].mode;
12299 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12300 op0 = copy_to_mode_reg (mode0, op0);
12301 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12302 op1 = copy_to_mode_reg (mode1, op1);
12303 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12304 op2 = copy_to_mode_reg (mode2, op2);
12305 pat = GEN_FCN (icode) (op0, op1, op2);
12306 if (! pat)
12307 return 0;
12308 emit_insn (pat);
12309 return 0;
12311 case IX86_BUILTIN_SQRTSS:
12312 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12313 case IX86_BUILTIN_RSQRTSS:
12314 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12315 case IX86_BUILTIN_RCPSS:
12316 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12318 case IX86_BUILTIN_ANDPS:
12319 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12320 arglist, target);
12321 case IX86_BUILTIN_ANDNPS:
12322 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12323 arglist, target);
12324 case IX86_BUILTIN_ORPS:
12325 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12326 arglist, target);
12327 case IX86_BUILTIN_XORPS:
12328 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12329 arglist, target);
12331 case IX86_BUILTIN_LOADAPS:
12332 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12334 case IX86_BUILTIN_LOADUPS:
12335 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12337 case IX86_BUILTIN_STOREAPS:
12338 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12339 case IX86_BUILTIN_STOREUPS:
12340 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12342 case IX86_BUILTIN_LOADSS:
12343 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12345 case IX86_BUILTIN_STORESS:
12346 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12348 case IX86_BUILTIN_LOADHPS:
12349 case IX86_BUILTIN_LOADLPS:
12350 case IX86_BUILTIN_LOADHPD:
12351 case IX86_BUILTIN_LOADLPD:
12352 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12353 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12354 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12355 : CODE_FOR_sse2_movlpd);
12356 arg0 = TREE_VALUE (arglist);
12357 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12358 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12359 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12360 tmode = insn_data[icode].operand[0].mode;
12361 mode0 = insn_data[icode].operand[1].mode;
12362 mode1 = insn_data[icode].operand[2].mode;
12364 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12365 op0 = copy_to_mode_reg (mode0, op0);
12366 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12367 if (target == 0
12368 || GET_MODE (target) != tmode
12369 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12370 target = gen_reg_rtx (tmode);
12371 pat = GEN_FCN (icode) (target, op0, op1);
12372 if (! pat)
12373 return 0;
12374 emit_insn (pat);
12375 return target;
12377 case IX86_BUILTIN_STOREHPS:
12378 case IX86_BUILTIN_STORELPS:
12379 case IX86_BUILTIN_STOREHPD:
12380 case IX86_BUILTIN_STORELPD:
12381 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12382 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12383 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12384 : CODE_FOR_sse2_movlpd);
12385 arg0 = TREE_VALUE (arglist);
12386 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12387 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12388 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12389 mode0 = insn_data[icode].operand[1].mode;
12390 mode1 = insn_data[icode].operand[2].mode;
12392 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12393 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12394 op1 = copy_to_mode_reg (mode1, op1);
12396 pat = GEN_FCN (icode) (op0, op0, op1);
12397 if (! pat)
12398 return 0;
12399 emit_insn (pat);
12400 return 0;
12402 case IX86_BUILTIN_MOVNTPS:
12403 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
12404 case IX86_BUILTIN_MOVNTQ:
12405 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
12407 case IX86_BUILTIN_LDMXCSR:
12408 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
12409 target = assign_386_stack_local (SImode, 0);
12410 emit_move_insn (target, op0);
12411 emit_insn (gen_ldmxcsr (target));
12412 return 0;
12414 case IX86_BUILTIN_STMXCSR:
12415 target = assign_386_stack_local (SImode, 0);
12416 emit_insn (gen_stmxcsr (target));
12417 return copy_to_mode_reg (SImode, target);
12419 case IX86_BUILTIN_SHUFPS:
12420 case IX86_BUILTIN_SHUFPD:
12421 icode = (fcode == IX86_BUILTIN_SHUFPS
12422 ? CODE_FOR_sse_shufps
12423 : CODE_FOR_sse2_shufpd);
12424 arg0 = TREE_VALUE (arglist);
12425 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12426 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12427 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12428 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12429 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12430 tmode = insn_data[icode].operand[0].mode;
12431 mode0 = insn_data[icode].operand[1].mode;
12432 mode1 = insn_data[icode].operand[2].mode;
12433 mode2 = insn_data[icode].operand[3].mode;
12435 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12436 op0 = copy_to_mode_reg (mode0, op0);
12437 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12438 op1 = copy_to_mode_reg (mode1, op1);
12439 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12441 /* @@@ better error message */
12442 error ("mask must be an immediate");
12443 return gen_reg_rtx (tmode);
12445 if (target == 0
12446 || GET_MODE (target) != tmode
12447 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12448 target = gen_reg_rtx (tmode);
12449 pat = GEN_FCN (icode) (target, op0, op1, op2);
12450 if (! pat)
12451 return 0;
12452 emit_insn (pat);
12453 return target;
12455 case IX86_BUILTIN_PSHUFW:
12456 case IX86_BUILTIN_PSHUFD:
12457 case IX86_BUILTIN_PSHUFHW:
12458 case IX86_BUILTIN_PSHUFLW:
12459 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
12460 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
12461 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
12462 : CODE_FOR_mmx_pshufw);
12463 arg0 = TREE_VALUE (arglist);
12464 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12465 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12466 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12467 tmode = insn_data[icode].operand[0].mode;
12468 mode1 = insn_data[icode].operand[1].mode;
12469 mode2 = insn_data[icode].operand[2].mode;
12471 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12472 op0 = copy_to_mode_reg (mode1, op0);
12473 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12475 /* @@@ better error message */
12476 error ("mask must be an immediate");
12477 return const0_rtx;
12479 if (target == 0
12480 || GET_MODE (target) != tmode
12481 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12482 target = gen_reg_rtx (tmode);
12483 pat = GEN_FCN (icode) (target, op0, op1);
12484 if (! pat)
12485 return 0;
12486 emit_insn (pat);
12487 return target;
12489 case IX86_BUILTIN_FEMMS:
12490 emit_insn (gen_femms ());
12491 return NULL_RTX;
12493 case IX86_BUILTIN_PAVGUSB:
12494 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
12496 case IX86_BUILTIN_PF2ID:
12497 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
12499 case IX86_BUILTIN_PFACC:
12500 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
12502 case IX86_BUILTIN_PFADD:
12503 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
12505 case IX86_BUILTIN_PFCMPEQ:
12506 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
12508 case IX86_BUILTIN_PFCMPGE:
12509 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
12511 case IX86_BUILTIN_PFCMPGT:
12512 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
12514 case IX86_BUILTIN_PFMAX:
12515 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
12517 case IX86_BUILTIN_PFMIN:
12518 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
12520 case IX86_BUILTIN_PFMUL:
12521 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
12523 case IX86_BUILTIN_PFRCP:
12524 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
12526 case IX86_BUILTIN_PFRCPIT1:
12527 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
12529 case IX86_BUILTIN_PFRCPIT2:
12530 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
12532 case IX86_BUILTIN_PFRSQIT1:
12533 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
12535 case IX86_BUILTIN_PFRSQRT:
12536 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
12538 case IX86_BUILTIN_PFSUB:
12539 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12541 case IX86_BUILTIN_PFSUBR:
12542 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12544 case IX86_BUILTIN_PI2FD:
12545 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12547 case IX86_BUILTIN_PMULHRW:
12548 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12550 case IX86_BUILTIN_PF2IW:
12551 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12553 case IX86_BUILTIN_PFNACC:
12554 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12556 case IX86_BUILTIN_PFPNACC:
12557 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12559 case IX86_BUILTIN_PI2FW:
12560 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12562 case IX86_BUILTIN_PSWAPDSI:
12563 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12565 case IX86_BUILTIN_PSWAPDSF:
12566 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12568 case IX86_BUILTIN_SSE_ZERO:
12569 target = gen_reg_rtx (V4SFmode);
12570 emit_insn (gen_sse_clrv4sf (target));
12571 return target;
12573 case IX86_BUILTIN_MMX_ZERO:
12574 target = gen_reg_rtx (DImode);
12575 emit_insn (gen_mmx_clrdi (target));
12576 return target;
12578 case IX86_BUILTIN_SQRTSD:
12579 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
12580 case IX86_BUILTIN_LOADAPD:
12581 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
12582 case IX86_BUILTIN_LOADUPD:
12583 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
12585 case IX86_BUILTIN_STOREAPD:
12586 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12587 case IX86_BUILTIN_STOREUPD:
12588 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
12590 case IX86_BUILTIN_LOADSD:
12591 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
12593 case IX86_BUILTIN_STORESD:
12594 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
12596 case IX86_BUILTIN_SETPD1:
12597 target = assign_386_stack_local (DFmode, 0);
12598 arg0 = TREE_VALUE (arglist);
12599 emit_move_insn (adjust_address (target, DFmode, 0),
12600 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12601 op0 = gen_reg_rtx (V2DFmode);
12602 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
12603 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
12604 return op0;
12606 case IX86_BUILTIN_SETPD:
12607 target = assign_386_stack_local (V2DFmode, 0);
12608 arg0 = TREE_VALUE (arglist);
12609 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12610 emit_move_insn (adjust_address (target, DFmode, 0),
12611 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12612 emit_move_insn (adjust_address (target, DFmode, 8),
12613 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
12614 op0 = gen_reg_rtx (V2DFmode);
12615 emit_insn (gen_sse2_movapd (op0, target));
12616 return op0;
12618 case IX86_BUILTIN_LOADRPD:
12619 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
12620 gen_reg_rtx (V2DFmode), 1);
12621 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
12622 return target;
12624 case IX86_BUILTIN_LOADPD1:
12625 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
12626 gen_reg_rtx (V2DFmode), 1);
12627 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
12628 return target;
12630 case IX86_BUILTIN_STOREPD1:
12631 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12632 case IX86_BUILTIN_STORERPD:
12633 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12635 case IX86_BUILTIN_MFENCE:
12636 emit_insn (gen_sse2_mfence ());
12637 return 0;
12638 case IX86_BUILTIN_LFENCE:
12639 emit_insn (gen_sse2_lfence ());
12640 return 0;
12642 case IX86_BUILTIN_CLFLUSH:
12643 arg0 = TREE_VALUE (arglist);
12644 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12645 icode = CODE_FOR_sse2_clflush;
12646 mode0 = insn_data[icode].operand[0].mode;
12647 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12648 op0 = copy_to_mode_reg (mode0, op0);
12650 emit_insn (gen_sse2_clflush (op0));
12651 return 0;
12653 case IX86_BUILTIN_MOVNTPD:
12654 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
12655 case IX86_BUILTIN_MOVNTDQ:
12656 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
12657 case IX86_BUILTIN_MOVNTI:
12658 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
12660 default:
12661 break;
12664 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12665 if (d->code == fcode)
12667 /* Compares are treated specially. */
12668 if (d->icode == CODE_FOR_maskcmpv4sf3
12669 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12670 || d->icode == CODE_FOR_maskncmpv4sf3
12671 || d->icode == CODE_FOR_vmmaskncmpv4sf3
12672 || d->icode == CODE_FOR_maskcmpv2df3
12673 || d->icode == CODE_FOR_vmmaskcmpv2df3
12674 || d->icode == CODE_FOR_maskncmpv2df3
12675 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12676 return ix86_expand_sse_compare (d, arglist, target);
12678 return ix86_expand_binop_builtin (d->icode, arglist, target);
12681 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12682 if (d->code == fcode)
12683 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12685 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12686 if (d->code == fcode)
12687 return ix86_expand_sse_comi (d, arglist, target);
12689 /* @@@ Should really do something sensible here. */
12690 return 0;
12693 /* Store OPERAND to the memory after reload is completed. This means
12694 that we can't easily use assign_stack_local. */
12696 ix86_force_to_memory (mode, operand)
12697 enum machine_mode mode;
12698 rtx operand;
12700 rtx result;
12701 if (!reload_completed)
12702 abort ();
12703 if (TARGET_64BIT && TARGET_RED_ZONE)
12705 result = gen_rtx_MEM (mode,
12706 gen_rtx_PLUS (Pmode,
12707 stack_pointer_rtx,
12708 GEN_INT (-RED_ZONE_SIZE)));
12709 emit_move_insn (result, operand);
12711 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12713 switch (mode)
12715 case HImode:
12716 case SImode:
12717 operand = gen_lowpart (DImode, operand);
12718 /* FALLTHRU */
12719 case DImode:
12720 emit_insn (
12721 gen_rtx_SET (VOIDmode,
12722 gen_rtx_MEM (DImode,
12723 gen_rtx_PRE_DEC (DImode,
12724 stack_pointer_rtx)),
12725 operand));
12726 break;
12727 default:
12728 abort ();
12730 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12732 else
12734 switch (mode)
12736 case DImode:
12738 rtx operands[2];
12739 split_di (&operand, 1, operands, operands + 1);
12740 emit_insn (
12741 gen_rtx_SET (VOIDmode,
12742 gen_rtx_MEM (SImode,
12743 gen_rtx_PRE_DEC (Pmode,
12744 stack_pointer_rtx)),
12745 operands[1]));
12746 emit_insn (
12747 gen_rtx_SET (VOIDmode,
12748 gen_rtx_MEM (SImode,
12749 gen_rtx_PRE_DEC (Pmode,
12750 stack_pointer_rtx)),
12751 operands[0]));
12753 break;
12754 case HImode:
12755 /* It is better to store HImodes as SImodes. */
12756 if (!TARGET_PARTIAL_REG_STALL)
12757 operand = gen_lowpart (SImode, operand);
12758 /* FALLTHRU */
12759 case SImode:
12760 emit_insn (
12761 gen_rtx_SET (VOIDmode,
12762 gen_rtx_MEM (GET_MODE (operand),
12763 gen_rtx_PRE_DEC (SImode,
12764 stack_pointer_rtx)),
12765 operand));
12766 break;
12767 default:
12768 abort ();
12770 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12772 return result;
12775 /* Free operand from the memory. */
12776 void
12777 ix86_free_from_memory (mode)
12778 enum machine_mode mode;
12780 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12782 int size;
12784 if (mode == DImode || TARGET_64BIT)
12785 size = 8;
12786 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12787 size = 2;
12788 else
12789 size = 4;
12790 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12791 to pop or add instruction if registers are available. */
12792 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12793 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12794 GEN_INT (size))));
12798 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12799 QImode must go into class Q_REGS.
12800 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12801 movdf to do mem-to-mem moves through integer regs. */
12802 enum reg_class
12803 ix86_preferred_reload_class (x, class)
12804 rtx x;
12805 enum reg_class class;
12807 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12809 /* SSE can't load any constant directly yet. */
12810 if (SSE_CLASS_P (class))
12811 return NO_REGS;
12812 /* Floats can load 0 and 1. */
12813 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12815 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12816 if (MAYBE_SSE_CLASS_P (class))
12817 return (reg_class_subset_p (class, GENERAL_REGS)
12818 ? GENERAL_REGS : FLOAT_REGS);
12819 else
12820 return class;
12822 /* General regs can load everything. */
12823 if (reg_class_subset_p (class, GENERAL_REGS))
12824 return GENERAL_REGS;
12825 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12826 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12827 return NO_REGS;
12829 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12830 return NO_REGS;
12831 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12832 return Q_REGS;
12833 return class;
12836 /* If we are copying between general and FP registers, we need a memory
12837 location. The same is true for SSE and MMX registers.
12839 The macro can't work reliably when one of the CLASSES is class containing
12840 registers from multiple units (SSE, MMX, integer). We avoid this by never
12841 combining those units in single alternative in the machine description.
12842 Ensure that this constraint holds to avoid unexpected surprises.
12844 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12845 enforce these sanity checks. */
12847 ix86_secondary_memory_needed (class1, class2, mode, strict)
12848 enum reg_class class1, class2;
12849 enum machine_mode mode;
12850 int strict;
12852 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12853 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12854 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12855 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12856 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12857 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12859 if (strict)
12860 abort ();
12861 else
12862 return 1;
12864 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12865 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12866 && (mode) != SImode)
12867 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12868 && (mode) != SImode));
12870 /* Return the cost of moving data from a register in class CLASS1 to
12871 one in class CLASS2.
12873 It is not required that the cost always equal 2 when FROM is the same as TO;
12874 on some machines it is expensive to move between registers if they are not
12875 general registers. */
12877 ix86_register_move_cost (mode, class1, class2)
12878 enum machine_mode mode;
12879 enum reg_class class1, class2;
12881 /* In case we require secondary memory, compute cost of the store followed
12882 by load. In case of copying from general_purpose_register we may emit
12883 multiple stores followed by single load causing memory size mismatch
12884 stall. Count this as arbitarily high cost of 20. */
12885 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12887 int add_cost = 0;
12888 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12889 add_cost = 20;
12890 return (MEMORY_MOVE_COST (mode, class1, 0)
12891 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12893 /* Moves between SSE/MMX and integer unit are expensive. */
12894 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12895 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12896 return ix86_cost->mmxsse_to_integer;
12897 if (MAYBE_FLOAT_CLASS_P (class1))
12898 return ix86_cost->fp_move;
12899 if (MAYBE_SSE_CLASS_P (class1))
12900 return ix86_cost->sse_move;
12901 if (MAYBE_MMX_CLASS_P (class1))
12902 return ix86_cost->mmx_move;
12903 return 2;
12906 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12908 ix86_hard_regno_mode_ok (regno, mode)
12909 int regno;
12910 enum machine_mode mode;
12912 /* Flags and only flags can only hold CCmode values. */
12913 if (CC_REGNO_P (regno))
12914 return GET_MODE_CLASS (mode) == MODE_CC;
12915 if (GET_MODE_CLASS (mode) == MODE_CC
12916 || GET_MODE_CLASS (mode) == MODE_RANDOM
12917 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12918 return 0;
12919 if (FP_REGNO_P (regno))
12920 return VALID_FP_MODE_P (mode);
12921 if (SSE_REGNO_P (regno))
12922 return VALID_SSE_REG_MODE (mode);
12923 if (MMX_REGNO_P (regno))
12924 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12925 /* We handle both integer and floats in the general purpose registers.
12926 In future we should be able to handle vector modes as well. */
12927 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12928 return 0;
12929 /* Take care for QImode values - they can be in non-QI regs, but then
12930 they do cause partial register stalls. */
12931 if (regno < 4 || mode != QImode || TARGET_64BIT)
12932 return 1;
12933 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12936 /* Return the cost of moving data of mode M between a
12937 register and memory. A value of 2 is the default; this cost is
12938 relative to those in `REGISTER_MOVE_COST'.
12940 If moving between registers and memory is more expensive than
12941 between two registers, you should define this macro to express the
12942 relative cost.
12944 Model also increased moving costs of QImode registers in non
12945 Q_REGS classes.
12948 ix86_memory_move_cost (mode, class, in)
12949 enum machine_mode mode;
12950 enum reg_class class;
12951 int in;
12953 if (FLOAT_CLASS_P (class))
12955 int index;
12956 switch (mode)
12958 case SFmode:
12959 index = 0;
12960 break;
12961 case DFmode:
12962 index = 1;
12963 break;
12964 case XFmode:
12965 case TFmode:
12966 index = 2;
12967 break;
12968 default:
12969 return 100;
12971 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12973 if (SSE_CLASS_P (class))
12975 int index;
12976 switch (GET_MODE_SIZE (mode))
12978 case 4:
12979 index = 0;
12980 break;
12981 case 8:
12982 index = 1;
12983 break;
12984 case 16:
12985 index = 2;
12986 break;
12987 default:
12988 return 100;
12990 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12992 if (MMX_CLASS_P (class))
12994 int index;
12995 switch (GET_MODE_SIZE (mode))
12997 case 4:
12998 index = 0;
12999 break;
13000 case 8:
13001 index = 1;
13002 break;
13003 default:
13004 return 100;
13006 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13008 switch (GET_MODE_SIZE (mode))
13010 case 1:
13011 if (in)
13012 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13013 : ix86_cost->movzbl_load);
13014 else
13015 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13016 : ix86_cost->int_store[0] + 4);
13017 break;
13018 case 2:
13019 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13020 default:
13021 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13022 if (mode == TFmode)
13023 mode = XFmode;
13024 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13025 * (int) GET_MODE_SIZE (mode) / 4);
13029 #ifdef DO_GLOBAL_CTORS_BODY
13030 static void
13031 ix86_svr3_asm_out_constructor (symbol, priority)
13032 rtx symbol;
13033 int priority ATTRIBUTE_UNUSED;
13035 init_section ();
13036 fputs ("\tpushl $", asm_out_file);
13037 assemble_name (asm_out_file, XSTR (symbol, 0));
13038 fputc ('\n', asm_out_file);
13040 #endif
13042 /* Order the registers for register allocator. */
13044 void
13045 x86_order_regs_for_local_alloc ()
13047 int pos = 0;
13048 int i;
13050 /* First allocate the local general purpose registers. */
13051 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13052 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13053 reg_alloc_order [pos++] = i;
13055 /* Global general purpose registers. */
13056 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13057 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13058 reg_alloc_order [pos++] = i;
13060 /* x87 registers come first in case we are doing FP math
13061 using them. */
13062 if (!TARGET_SSE_MATH)
13063 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13064 reg_alloc_order [pos++] = i;
13066 /* SSE registers. */
13067 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13068 reg_alloc_order [pos++] = i;
13069 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13070 reg_alloc_order [pos++] = i;
13072 /* x87 registerts. */
13073 if (TARGET_SSE_MATH)
13074 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13075 reg_alloc_order [pos++] = i;
13077 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13078 reg_alloc_order [pos++] = i;
13080 /* Initialize the rest of array as we do not allocate some registers
13081 at all. */
13082 while (pos < FIRST_PSEUDO_REGISTER)
13083 reg_alloc_order [pos++] = 0;
13086 void
13087 x86_output_mi_thunk (file, delta, function)
13088 FILE *file;
13089 int delta;
13090 tree function;
13092 tree parm;
13093 rtx xops[3];
13095 if (ix86_regparm > 0)
13096 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13097 else
13098 parm = NULL_TREE;
13099 for (; parm; parm = TREE_CHAIN (parm))
13100 if (TREE_VALUE (parm) == void_type_node)
13101 break;
13103 xops[0] = GEN_INT (delta);
13104 if (TARGET_64BIT)
13106 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13107 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13108 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13109 if (flag_pic)
13111 fprintf (file, "\tjmp *");
13112 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13113 fprintf (file, "@GOTPCREL(%%rip)\n");
13115 else
13117 fprintf (file, "\tjmp ");
13118 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13119 fprintf (file, "\n");
13122 else
13124 if (parm)
13125 xops[1] = gen_rtx_REG (SImode, 0);
13126 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13127 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13128 else
13129 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13130 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13132 if (flag_pic)
13134 xops[0] = pic_offset_table_rtx;
13135 xops[1] = gen_label_rtx ();
13136 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13138 if (ix86_regparm > 2)
13139 abort ();
13140 output_asm_insn ("push{l}\t%0", xops);
13141 output_asm_insn ("call\t%P1", xops);
13142 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13143 output_asm_insn ("pop{l}\t%0", xops);
13144 output_asm_insn
13145 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13146 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13147 output_asm_insn
13148 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13149 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13150 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13152 else
13154 fprintf (file, "\tjmp ");
13155 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13156 fprintf (file, "\n");