2006-11-18 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / config / i386 / i386.c
blob0428bfacea58fbfa8beb11172aefea93864d7fca
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 static const
72 struct processor_costs size_cost = { /* costs for tuning for size */
73 COSTS_N_BYTES (2), /* cost of an add instruction */
74 COSTS_N_BYTES (3), /* cost of a lea instruction */
75 COSTS_N_BYTES (2), /* variable shift costs */
76 COSTS_N_BYTES (3), /* constant shift costs */
77 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
78 COSTS_N_BYTES (3), /* HI */
79 COSTS_N_BYTES (3), /* SI */
80 COSTS_N_BYTES (3), /* DI */
81 COSTS_N_BYTES (5)}, /* other */
82 0, /* cost of multiply per each bit set */
83 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
84 COSTS_N_BYTES (3), /* HI */
85 COSTS_N_BYTES (3), /* SI */
86 COSTS_N_BYTES (3), /* DI */
87 COSTS_N_BYTES (5)}, /* other */
88 COSTS_N_BYTES (3), /* cost of movsx */
89 COSTS_N_BYTES (3), /* cost of movzx */
90 0, /* "large" insn */
91 2, /* MOVE_RATIO */
92 2, /* cost for loading QImode using movzbl */
93 {2, 2, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 2, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {2, 2, 2}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {2, 2, 2}, /* cost of storing fp registers
101 in SFmode, DFmode and XFmode */
102 3, /* cost of moving MMX register */
103 {3, 3}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {3, 3}, /* cost of storing MMX registers
106 in SImode and DImode */
107 3, /* cost of moving SSE register */
108 {3, 3, 3}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {3, 3, 3}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
113 0, /* size of prefetch block */
114 0, /* number of parallel prefetches */
115 2, /* Branch cost */
116 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
117 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
118 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
119 COSTS_N_BYTES (2), /* cost of FABS instruction. */
120 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
121 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 /* Processor costs (relative to an add) */
125 static const
126 struct processor_costs i386_cost = { /* 386 specific costs */
127 COSTS_N_INSNS (1), /* cost of an add instruction */
128 COSTS_N_INSNS (1), /* cost of a lea instruction */
129 COSTS_N_INSNS (3), /* variable shift costs */
130 COSTS_N_INSNS (2), /* constant shift costs */
131 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
132 COSTS_N_INSNS (6), /* HI */
133 COSTS_N_INSNS (6), /* SI */
134 COSTS_N_INSNS (6), /* DI */
135 COSTS_N_INSNS (6)}, /* other */
136 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
137 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
138 COSTS_N_INSNS (23), /* HI */
139 COSTS_N_INSNS (23), /* SI */
140 COSTS_N_INSNS (23), /* DI */
141 COSTS_N_INSNS (23)}, /* other */
142 COSTS_N_INSNS (3), /* cost of movsx */
143 COSTS_N_INSNS (2), /* cost of movzx */
144 15, /* "large" insn */
145 3, /* MOVE_RATIO */
146 4, /* cost for loading QImode using movzbl */
147 {2, 4, 2}, /* cost of loading integer registers
148 in QImode, HImode and SImode.
149 Relative to reg-reg move (2). */
150 {2, 4, 2}, /* cost of storing integer registers */
151 2, /* cost of reg,reg fld/fst */
152 {8, 8, 8}, /* cost of loading fp registers
153 in SFmode, DFmode and XFmode */
154 {8, 8, 8}, /* cost of storing fp registers
155 in SFmode, DFmode and XFmode */
156 2, /* cost of moving MMX register */
157 {4, 8}, /* cost of loading MMX registers
158 in SImode and DImode */
159 {4, 8}, /* cost of storing MMX registers
160 in SImode and DImode */
161 2, /* cost of moving SSE register */
162 {4, 8, 16}, /* cost of loading SSE registers
163 in SImode, DImode and TImode */
164 {4, 8, 16}, /* cost of storing SSE registers
165 in SImode, DImode and TImode */
166 3, /* MMX or SSE register to integer */
167 0, /* size of prefetch block */
168 0, /* number of parallel prefetches */
169 1, /* Branch cost */
170 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
171 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
172 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
173 COSTS_N_INSNS (22), /* cost of FABS instruction. */
174 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
175 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 static const
179 struct processor_costs i486_cost = { /* 486 specific costs */
180 COSTS_N_INSNS (1), /* cost of an add instruction */
181 COSTS_N_INSNS (1), /* cost of a lea instruction */
182 COSTS_N_INSNS (3), /* variable shift costs */
183 COSTS_N_INSNS (2), /* constant shift costs */
184 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
185 COSTS_N_INSNS (12), /* HI */
186 COSTS_N_INSNS (12), /* SI */
187 COSTS_N_INSNS (12), /* DI */
188 COSTS_N_INSNS (12)}, /* other */
189 1, /* cost of multiply per each bit set */
190 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
191 COSTS_N_INSNS (40), /* HI */
192 COSTS_N_INSNS (40), /* SI */
193 COSTS_N_INSNS (40), /* DI */
194 COSTS_N_INSNS (40)}, /* other */
195 COSTS_N_INSNS (3), /* cost of movsx */
196 COSTS_N_INSNS (2), /* cost of movzx */
197 15, /* "large" insn */
198 3, /* MOVE_RATIO */
199 4, /* cost for loading QImode using movzbl */
200 {2, 4, 2}, /* cost of loading integer registers
201 in QImode, HImode and SImode.
202 Relative to reg-reg move (2). */
203 {2, 4, 2}, /* cost of storing integer registers */
204 2, /* cost of reg,reg fld/fst */
205 {8, 8, 8}, /* cost of loading fp registers
206 in SFmode, DFmode and XFmode */
207 {8, 8, 8}, /* cost of storing fp registers
208 in SFmode, DFmode and XFmode */
209 2, /* cost of moving MMX register */
210 {4, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {4, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
222 1, /* Branch cost */
223 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
224 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
225 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
226 COSTS_N_INSNS (3), /* cost of FABS instruction. */
227 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
228 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 static const
232 struct processor_costs pentium_cost = {
233 COSTS_N_INSNS (1), /* cost of an add instruction */
234 COSTS_N_INSNS (1), /* cost of a lea instruction */
235 COSTS_N_INSNS (4), /* variable shift costs */
236 COSTS_N_INSNS (1), /* constant shift costs */
237 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
238 COSTS_N_INSNS (11), /* HI */
239 COSTS_N_INSNS (11), /* SI */
240 COSTS_N_INSNS (11), /* DI */
241 COSTS_N_INSNS (11)}, /* other */
242 0, /* cost of multiply per each bit set */
243 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
244 COSTS_N_INSNS (25), /* HI */
245 COSTS_N_INSNS (25), /* SI */
246 COSTS_N_INSNS (25), /* DI */
247 COSTS_N_INSNS (25)}, /* other */
248 COSTS_N_INSNS (3), /* cost of movsx */
249 COSTS_N_INSNS (2), /* cost of movzx */
250 8, /* "large" insn */
251 6, /* MOVE_RATIO */
252 6, /* cost for loading QImode using movzbl */
253 {2, 4, 2}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 4, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of storing fp registers
261 in SFmode, DFmode and XFmode */
262 8, /* cost of moving MMX register */
263 {8, 8}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {8, 8}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {4, 8, 16}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {4, 8, 16}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
275 2, /* Branch cost */
276 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (1), /* cost of FABS instruction. */
280 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 static const
285 struct processor_costs pentiumpro_cost = {
286 COSTS_N_INSNS (1), /* cost of an add instruction */
287 COSTS_N_INSNS (1), /* cost of a lea instruction */
288 COSTS_N_INSNS (1), /* variable shift costs */
289 COSTS_N_INSNS (1), /* constant shift costs */
290 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
291 COSTS_N_INSNS (4), /* HI */
292 COSTS_N_INSNS (4), /* SI */
293 COSTS_N_INSNS (4), /* DI */
294 COSTS_N_INSNS (4)}, /* other */
295 0, /* cost of multiply per each bit set */
296 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
297 COSTS_N_INSNS (17), /* HI */
298 COSTS_N_INSNS (17), /* SI */
299 COSTS_N_INSNS (17), /* DI */
300 COSTS_N_INSNS (17)}, /* other */
301 COSTS_N_INSNS (1), /* cost of movsx */
302 COSTS_N_INSNS (1), /* cost of movzx */
303 8, /* "large" insn */
304 6, /* MOVE_RATIO */
305 2, /* cost for loading QImode using movzbl */
306 {4, 4, 4}, /* cost of loading integer registers
307 in QImode, HImode and SImode.
308 Relative to reg-reg move (2). */
309 {2, 2, 2}, /* cost of storing integer registers */
310 2, /* cost of reg,reg fld/fst */
311 {2, 2, 6}, /* cost of loading fp registers
312 in SFmode, DFmode and XFmode */
313 {4, 4, 6}, /* cost of storing fp registers
314 in SFmode, DFmode and XFmode */
315 2, /* cost of moving MMX register */
316 {2, 2}, /* cost of loading MMX registers
317 in SImode and DImode */
318 {2, 2}, /* cost of storing MMX registers
319 in SImode and DImode */
320 2, /* cost of moving SSE register */
321 {2, 2, 8}, /* cost of loading SSE registers
322 in SImode, DImode and TImode */
323 {2, 2, 8}, /* cost of storing SSE registers
324 in SImode, DImode and TImode */
325 3, /* MMX or SSE register to integer */
326 32, /* size of prefetch block */
327 6, /* number of parallel prefetches */
328 2, /* Branch cost */
329 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
330 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
331 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
332 COSTS_N_INSNS (2), /* cost of FABS instruction. */
333 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
334 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 static const
338 struct processor_costs geode_cost = {
339 COSTS_N_INSNS (1), /* cost of an add instruction */
340 COSTS_N_INSNS (1), /* cost of a lea instruction */
341 COSTS_N_INSNS (2), /* variable shift costs */
342 COSTS_N_INSNS (1), /* constant shift costs */
343 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
344 COSTS_N_INSNS (4), /* HI */
345 COSTS_N_INSNS (7), /* SI */
346 COSTS_N_INSNS (7), /* DI */
347 COSTS_N_INSNS (7)}, /* other */
348 0, /* cost of multiply per each bit set */
349 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
350 COSTS_N_INSNS (23), /* HI */
351 COSTS_N_INSNS (39), /* SI */
352 COSTS_N_INSNS (39), /* DI */
353 COSTS_N_INSNS (39)}, /* other */
354 COSTS_N_INSNS (1), /* cost of movsx */
355 COSTS_N_INSNS (1), /* cost of movzx */
356 8, /* "large" insn */
357 4, /* MOVE_RATIO */
358 1, /* cost for loading QImode using movzbl */
359 {1, 1, 1}, /* cost of loading integer registers
360 in QImode, HImode and SImode.
361 Relative to reg-reg move (2). */
362 {1, 1, 1}, /* cost of storing integer registers */
363 1, /* cost of reg,reg fld/fst */
364 {1, 1, 1}, /* cost of loading fp registers
365 in SFmode, DFmode and XFmode */
366 {4, 6, 6}, /* cost of storing fp registers
367 in SFmode, DFmode and XFmode */
369 1, /* cost of moving MMX register */
370 {1, 1}, /* cost of loading MMX registers
371 in SImode and DImode */
372 {1, 1}, /* cost of storing MMX registers
373 in SImode and DImode */
374 1, /* cost of moving SSE register */
375 {1, 1, 1}, /* cost of loading SSE registers
376 in SImode, DImode and TImode */
377 {1, 1, 1}, /* cost of storing SSE registers
378 in SImode, DImode and TImode */
379 1, /* MMX or SSE register to integer */
380 32, /* size of prefetch block */
381 1, /* number of parallel prefetches */
382 1, /* Branch cost */
383 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
384 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
385 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
386 COSTS_N_INSNS (1), /* cost of FABS instruction. */
387 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
388 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
391 static const
392 struct processor_costs k6_cost = {
393 COSTS_N_INSNS (1), /* cost of an add instruction */
394 COSTS_N_INSNS (2), /* cost of a lea instruction */
395 COSTS_N_INSNS (1), /* variable shift costs */
396 COSTS_N_INSNS (1), /* constant shift costs */
397 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
398 COSTS_N_INSNS (3), /* HI */
399 COSTS_N_INSNS (3), /* SI */
400 COSTS_N_INSNS (3), /* DI */
401 COSTS_N_INSNS (3)}, /* other */
402 0, /* cost of multiply per each bit set */
403 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
404 COSTS_N_INSNS (18), /* HI */
405 COSTS_N_INSNS (18), /* SI */
406 COSTS_N_INSNS (18), /* DI */
407 COSTS_N_INSNS (18)}, /* other */
408 COSTS_N_INSNS (2), /* cost of movsx */
409 COSTS_N_INSNS (2), /* cost of movzx */
410 8, /* "large" insn */
411 4, /* MOVE_RATIO */
412 3, /* cost for loading QImode using movzbl */
413 {4, 5, 4}, /* cost of loading integer registers
414 in QImode, HImode and SImode.
415 Relative to reg-reg move (2). */
416 {2, 3, 2}, /* cost of storing integer registers */
417 4, /* cost of reg,reg fld/fst */
418 {6, 6, 6}, /* cost of loading fp registers
419 in SFmode, DFmode and XFmode */
420 {4, 4, 4}, /* cost of storing fp registers
421 in SFmode, DFmode and XFmode */
422 2, /* cost of moving MMX register */
423 {2, 2}, /* cost of loading MMX registers
424 in SImode and DImode */
425 {2, 2}, /* cost of storing MMX registers
426 in SImode and DImode */
427 2, /* cost of moving SSE register */
428 {2, 2, 8}, /* cost of loading SSE registers
429 in SImode, DImode and TImode */
430 {2, 2, 8}, /* cost of storing SSE registers
431 in SImode, DImode and TImode */
432 6, /* MMX or SSE register to integer */
433 32, /* size of prefetch block */
434 1, /* number of parallel prefetches */
435 1, /* Branch cost */
436 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
437 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
438 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
439 COSTS_N_INSNS (2), /* cost of FABS instruction. */
440 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
441 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
444 static const
445 struct processor_costs athlon_cost = {
446 COSTS_N_INSNS (1), /* cost of an add instruction */
447 COSTS_N_INSNS (2), /* cost of a lea instruction */
448 COSTS_N_INSNS (1), /* variable shift costs */
449 COSTS_N_INSNS (1), /* constant shift costs */
450 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
451 COSTS_N_INSNS (5), /* HI */
452 COSTS_N_INSNS (5), /* SI */
453 COSTS_N_INSNS (5), /* DI */
454 COSTS_N_INSNS (5)}, /* other */
455 0, /* cost of multiply per each bit set */
456 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
457 COSTS_N_INSNS (26), /* HI */
458 COSTS_N_INSNS (42), /* SI */
459 COSTS_N_INSNS (74), /* DI */
460 COSTS_N_INSNS (74)}, /* other */
461 COSTS_N_INSNS (1), /* cost of movsx */
462 COSTS_N_INSNS (1), /* cost of movzx */
463 8, /* "large" insn */
464 9, /* MOVE_RATIO */
465 4, /* cost for loading QImode using movzbl */
466 {3, 4, 3}, /* cost of loading integer registers
467 in QImode, HImode and SImode.
468 Relative to reg-reg move (2). */
469 {3, 4, 3}, /* cost of storing integer registers */
470 4, /* cost of reg,reg fld/fst */
471 {4, 4, 12}, /* cost of loading fp registers
472 in SFmode, DFmode and XFmode */
473 {6, 6, 8}, /* cost of storing fp registers
474 in SFmode, DFmode and XFmode */
475 2, /* cost of moving MMX register */
476 {4, 4}, /* cost of loading MMX registers
477 in SImode and DImode */
478 {4, 4}, /* cost of storing MMX registers
479 in SImode and DImode */
480 2, /* cost of moving SSE register */
481 {4, 4, 6}, /* cost of loading SSE registers
482 in SImode, DImode and TImode */
483 {4, 4, 5}, /* cost of storing SSE registers
484 in SImode, DImode and TImode */
485 5, /* MMX or SSE register to integer */
486 64, /* size of prefetch block */
487 6, /* number of parallel prefetches */
488 5, /* Branch cost */
489 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
490 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
491 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
492 COSTS_N_INSNS (2), /* cost of FABS instruction. */
493 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
494 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
497 static const
498 struct processor_costs k8_cost = {
499 COSTS_N_INSNS (1), /* cost of an add instruction */
500 COSTS_N_INSNS (2), /* cost of a lea instruction */
501 COSTS_N_INSNS (1), /* variable shift costs */
502 COSTS_N_INSNS (1), /* constant shift costs */
503 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
504 COSTS_N_INSNS (4), /* HI */
505 COSTS_N_INSNS (3), /* SI */
506 COSTS_N_INSNS (4), /* DI */
507 COSTS_N_INSNS (5)}, /* other */
508 0, /* cost of multiply per each bit set */
509 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
510 COSTS_N_INSNS (26), /* HI */
511 COSTS_N_INSNS (42), /* SI */
512 COSTS_N_INSNS (74), /* DI */
513 COSTS_N_INSNS (74)}, /* other */
514 COSTS_N_INSNS (1), /* cost of movsx */
515 COSTS_N_INSNS (1), /* cost of movzx */
516 8, /* "large" insn */
517 9, /* MOVE_RATIO */
518 4, /* cost for loading QImode using movzbl */
519 {3, 4, 3}, /* cost of loading integer registers
520 in QImode, HImode and SImode.
521 Relative to reg-reg move (2). */
522 {3, 4, 3}, /* cost of storing integer registers */
523 4, /* cost of reg,reg fld/fst */
524 {4, 4, 12}, /* cost of loading fp registers
525 in SFmode, DFmode and XFmode */
526 {6, 6, 8}, /* cost of storing fp registers
527 in SFmode, DFmode and XFmode */
528 2, /* cost of moving MMX register */
529 {3, 3}, /* cost of loading MMX registers
530 in SImode and DImode */
531 {4, 4}, /* cost of storing MMX registers
532 in SImode and DImode */
533 2, /* cost of moving SSE register */
534 {4, 3, 6}, /* cost of loading SSE registers
535 in SImode, DImode and TImode */
536 {4, 4, 5}, /* cost of storing SSE registers
537 in SImode, DImode and TImode */
538 5, /* MMX or SSE register to integer */
539 64, /* size of prefetch block */
540 /* New AMD processors never drop prefetches; if they cannot be performed
541 immediately, they are queued. We set number of simultaneous prefetches
542 to a large constant to reflect this (it probably is not a good idea not
543 to limit number of prefetches at all, as their execution also takes some
544 time). */
545 100, /* number of parallel prefetches */
546 5, /* Branch cost */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
555 static const
556 struct processor_costs pentium4_cost = {
557 COSTS_N_INSNS (1), /* cost of an add instruction */
558 COSTS_N_INSNS (3), /* cost of a lea instruction */
559 COSTS_N_INSNS (4), /* variable shift costs */
560 COSTS_N_INSNS (4), /* constant shift costs */
561 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
562 COSTS_N_INSNS (15), /* HI */
563 COSTS_N_INSNS (15), /* SI */
564 COSTS_N_INSNS (15), /* DI */
565 COSTS_N_INSNS (15)}, /* other */
566 0, /* cost of multiply per each bit set */
567 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
568 COSTS_N_INSNS (56), /* HI */
569 COSTS_N_INSNS (56), /* SI */
570 COSTS_N_INSNS (56), /* DI */
571 COSTS_N_INSNS (56)}, /* other */
572 COSTS_N_INSNS (1), /* cost of movsx */
573 COSTS_N_INSNS (1), /* cost of movzx */
574 16, /* "large" insn */
575 6, /* MOVE_RATIO */
576 2, /* cost for loading QImode using movzbl */
577 {4, 5, 4}, /* cost of loading integer registers
578 in QImode, HImode and SImode.
579 Relative to reg-reg move (2). */
580 {2, 3, 2}, /* cost of storing integer registers */
581 2, /* cost of reg,reg fld/fst */
582 {2, 2, 6}, /* cost of loading fp registers
583 in SFmode, DFmode and XFmode */
584 {4, 4, 6}, /* cost of storing fp registers
585 in SFmode, DFmode and XFmode */
586 2, /* cost of moving MMX register */
587 {2, 2}, /* cost of loading MMX registers
588 in SImode and DImode */
589 {2, 2}, /* cost of storing MMX registers
590 in SImode and DImode */
591 12, /* cost of moving SSE register */
592 {12, 12, 12}, /* cost of loading SSE registers
593 in SImode, DImode and TImode */
594 {2, 2, 8}, /* cost of storing SSE registers
595 in SImode, DImode and TImode */
596 10, /* MMX or SSE register to integer */
597 64, /* size of prefetch block */
598 6, /* number of parallel prefetches */
599 2, /* Branch cost */
600 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
601 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
602 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
603 COSTS_N_INSNS (2), /* cost of FABS instruction. */
604 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
605 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
608 static const
609 struct processor_costs nocona_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (1), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (10), /* HI */
616 COSTS_N_INSNS (10), /* SI */
617 COSTS_N_INSNS (10), /* DI */
618 COSTS_N_INSNS (10)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (66), /* HI */
622 COSTS_N_INSNS (66), /* SI */
623 COSTS_N_INSNS (66), /* DI */
624 COSTS_N_INSNS (66)}, /* other */
625 COSTS_N_INSNS (1), /* cost of movsx */
626 COSTS_N_INSNS (1), /* cost of movzx */
627 16, /* "large" insn */
628 17, /* MOVE_RATIO */
629 4, /* cost for loading QImode using movzbl */
630 {4, 4, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {4, 4, 4}, /* cost of storing integer registers */
634 3, /* cost of reg,reg fld/fst */
635 {12, 12, 12}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 6, /* cost of moving MMX register */
640 {12, 12}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {12, 12}, /* cost of storing MMX registers
643 in SImode and DImode */
644 6, /* cost of moving SSE register */
645 {12, 12, 12}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {12, 12, 12}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 8, /* MMX or SSE register to integer */
650 128, /* size of prefetch block */
651 8, /* number of parallel prefetches */
652 1, /* Branch cost */
653 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (3), /* cost of FABS instruction. */
657 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
661 static const
662 struct processor_costs core2_cost = {
663 COSTS_N_INSNS (1), /* cost of an add instruction */
664 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
665 COSTS_N_INSNS (1), /* variable shift costs */
666 COSTS_N_INSNS (1), /* constant shift costs */
667 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
668 COSTS_N_INSNS (3), /* HI */
669 COSTS_N_INSNS (3), /* SI */
670 COSTS_N_INSNS (3), /* DI */
671 COSTS_N_INSNS (3)}, /* other */
672 0, /* cost of multiply per each bit set */
673 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
674 COSTS_N_INSNS (22), /* HI */
675 COSTS_N_INSNS (22), /* SI */
676 COSTS_N_INSNS (22), /* DI */
677 COSTS_N_INSNS (22)}, /* other */
678 COSTS_N_INSNS (1), /* cost of movsx */
679 COSTS_N_INSNS (1), /* cost of movzx */
680 8, /* "large" insn */
681 16, /* MOVE_RATIO */
682 2, /* cost for loading QImode using movzbl */
683 {6, 6, 6}, /* cost of loading integer registers
684 in QImode, HImode and SImode.
685 Relative to reg-reg move (2). */
686 {4, 4, 4}, /* cost of storing integer registers */
687 2, /* cost of reg,reg fld/fst */
688 {6, 6, 6}, /* cost of loading fp registers
689 in SFmode, DFmode and XFmode */
690 {4, 4, 4}, /* cost of loading integer registers */
691 2, /* cost of moving MMX register */
692 {6, 6}, /* cost of loading MMX registers
693 in SImode and DImode */
694 {4, 4}, /* cost of storing MMX registers
695 in SImode and DImode */
696 2, /* cost of moving SSE register */
697 {6, 6, 6}, /* cost of loading SSE registers
698 in SImode, DImode and TImode */
699 {4, 4, 4}, /* cost of storing SSE registers
700 in SImode, DImode and TImode */
701 2, /* MMX or SSE register to integer */
702 128, /* size of prefetch block */
703 8, /* number of parallel prefetches */
704 3, /* Branch cost */
705 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
706 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
707 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
708 COSTS_N_INSNS (1), /* cost of FABS instruction. */
709 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
710 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
713 /* Generic64 should produce code tuned for Nocona and K8. */
714 static const
715 struct processor_costs generic64_cost = {
716 COSTS_N_INSNS (1), /* cost of an add instruction */
717 /* On all chips taken into consideration lea is 2 cycles and more. With
718 this cost however our current implementation of synth_mult results in
719 use of unnecessary temporary registers causing regression on several
720 SPECfp benchmarks. */
721 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
722 COSTS_N_INSNS (1), /* variable shift costs */
723 COSTS_N_INSNS (1), /* constant shift costs */
724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
725 COSTS_N_INSNS (4), /* HI */
726 COSTS_N_INSNS (3), /* SI */
727 COSTS_N_INSNS (4), /* DI */
728 COSTS_N_INSNS (2)}, /* other */
729 0, /* cost of multiply per each bit set */
730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
731 COSTS_N_INSNS (26), /* HI */
732 COSTS_N_INSNS (42), /* SI */
733 COSTS_N_INSNS (74), /* DI */
734 COSTS_N_INSNS (74)}, /* other */
735 COSTS_N_INSNS (1), /* cost of movsx */
736 COSTS_N_INSNS (1), /* cost of movzx */
737 8, /* "large" insn */
738 17, /* MOVE_RATIO */
739 4, /* cost for loading QImode using movzbl */
740 {4, 4, 4}, /* cost of loading integer registers
741 in QImode, HImode and SImode.
742 Relative to reg-reg move (2). */
743 {4, 4, 4}, /* cost of storing integer registers */
744 4, /* cost of reg,reg fld/fst */
745 {12, 12, 12}, /* cost of loading fp registers
746 in SFmode, DFmode and XFmode */
747 {6, 6, 8}, /* cost of storing fp registers
748 in SFmode, DFmode and XFmode */
749 2, /* cost of moving MMX register */
750 {8, 8}, /* cost of loading MMX registers
751 in SImode and DImode */
752 {8, 8}, /* cost of storing MMX registers
753 in SImode and DImode */
754 2, /* cost of moving SSE register */
755 {8, 8, 8}, /* cost of loading SSE registers
756 in SImode, DImode and TImode */
757 {8, 8, 8}, /* cost of storing SSE registers
758 in SImode, DImode and TImode */
759 5, /* MMX or SSE register to integer */
760 64, /* size of prefetch block */
761 6, /* number of parallel prefetches */
762 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
763 is increased to perhaps more appropriate value of 5. */
764 3, /* Branch cost */
765 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
766 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
767 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
768 COSTS_N_INSNS (8), /* cost of FABS instruction. */
769 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
770 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
773 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
774 static const
775 struct processor_costs generic32_cost = {
776 COSTS_N_INSNS (1), /* cost of an add instruction */
777 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
778 COSTS_N_INSNS (1), /* variable shift costs */
779 COSTS_N_INSNS (1), /* constant shift costs */
780 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
781 COSTS_N_INSNS (4), /* HI */
782 COSTS_N_INSNS (3), /* SI */
783 COSTS_N_INSNS (4), /* DI */
784 COSTS_N_INSNS (2)}, /* other */
785 0, /* cost of multiply per each bit set */
786 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
787 COSTS_N_INSNS (26), /* HI */
788 COSTS_N_INSNS (42), /* SI */
789 COSTS_N_INSNS (74), /* DI */
790 COSTS_N_INSNS (74)}, /* other */
791 COSTS_N_INSNS (1), /* cost of movsx */
792 COSTS_N_INSNS (1), /* cost of movzx */
793 8, /* "large" insn */
794 17, /* MOVE_RATIO */
795 4, /* cost for loading QImode using movzbl */
796 {4, 4, 4}, /* cost of loading integer registers
797 in QImode, HImode and SImode.
798 Relative to reg-reg move (2). */
799 {4, 4, 4}, /* cost of storing integer registers */
800 4, /* cost of reg,reg fld/fst */
801 {12, 12, 12}, /* cost of loading fp registers
802 in SFmode, DFmode and XFmode */
803 {6, 6, 8}, /* cost of storing fp registers
804 in SFmode, DFmode and XFmode */
805 2, /* cost of moving MMX register */
806 {8, 8}, /* cost of loading MMX registers
807 in SImode and DImode */
808 {8, 8}, /* cost of storing MMX registers
809 in SImode and DImode */
810 2, /* cost of moving SSE register */
811 {8, 8, 8}, /* cost of loading SSE registers
812 in SImode, DImode and TImode */
813 {8, 8, 8}, /* cost of storing SSE registers
814 in SImode, DImode and TImode */
815 5, /* MMX or SSE register to integer */
816 64, /* size of prefetch block */
817 6, /* number of parallel prefetches */
818 3, /* Branch cost */
819 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
820 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
821 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
822 COSTS_N_INSNS (8), /* cost of FABS instruction. */
823 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
824 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
827 const struct processor_costs *ix86_cost = &pentium_cost;
829 /* Processor feature/optimization bitmasks. */
830 #define m_386 (1<<PROCESSOR_I386)
831 #define m_486 (1<<PROCESSOR_I486)
832 #define m_PENT (1<<PROCESSOR_PENTIUM)
833 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
834 #define m_GEODE (1<<PROCESSOR_GEODE)
835 #define m_K6_GEODE (m_K6 | m_GEODE)
836 #define m_K6 (1<<PROCESSOR_K6)
837 #define m_ATHLON (1<<PROCESSOR_ATHLON)
838 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
839 #define m_K8 (1<<PROCESSOR_K8)
840 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
841 #define m_NOCONA (1<<PROCESSOR_NOCONA)
842 #define m_CORE2 (1<<PROCESSOR_CORE2)
843 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
844 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
845 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
847 /* Generic instruction choice should be common subset of supported CPUs
848 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
850 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
851 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
852 generic because it is not working well with PPro base chips. */
853 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
854 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
855 const int x86_zero_extend_with_and = m_486 | m_PENT;
856 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
857 const int x86_double_with_add = ~m_386;
858 const int x86_use_bit_test = m_386;
859 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
860 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
861 const int x86_3dnow_a = m_ATHLON_K8;
862 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
863 /* Branch hints were put in P4 based on simulation result. But
864 after P4 was made, no performance benefit was observed with
865 branch hints. It also increases the code size. As the result,
866 icc never generates branch hints. */
867 const int x86_branch_hints = 0;
868 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
869 /* We probably ought to watch for partial register stalls on Generic32
870 compilation setting as well. However in current implementation the
871 partial register stalls are not eliminated very well - they can
872 be introduced via subregs synthesized by combine and can happen
873 in caller/callee saving sequences.
874 Because this option pays back little on PPro based chips and is in conflict
875 with partial reg. dependencies used by Athlon/P4 based chips, it is better
876 to leave it off for generic32 for now. */
877 const int x86_partial_reg_stall = m_PPRO;
878 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
879 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
880 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
881 const int x86_use_mov0 = m_K6;
882 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
883 const int x86_read_modify_write = ~m_PENT;
884 const int x86_read_modify = ~(m_PENT | m_PPRO);
885 const int x86_split_long_moves = m_PPRO;
886 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
887 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
888 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
889 const int x86_qimode_math = ~(0);
890 const int x86_promote_qi_regs = 0;
891 /* On PPro this flag is meant to avoid partial register stalls. Just like
892 the x86_partial_reg_stall this option might be considered for Generic32
893 if our scheme for avoiding partial stalls was more effective. */
894 const int x86_himode_math = ~(m_PPRO);
895 const int x86_promote_hi_regs = m_PPRO;
896 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
897 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
898 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
899 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
900 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
901 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
902 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
903 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
904 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
905 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
906 const int x86_shift1 = ~m_486;
907 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
908 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
909 that thread 128bit SSE registers as single units versus K8 based chips that
910 divide SSE registers to two 64bit halves.
911 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
912 to allow register renaming on 128bit SSE units, but usually results in one
913 extra microop on 64bit SSE units. Experimental results shows that disabling
914 this option on P4 brings over 20% SPECfp regression, while enabling it on
915 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
916 of moves. */
917 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
918 /* Set for machines where the type and dependencies are resolved on SSE
919 register parts instead of whole registers, so we may maintain just
920 lower part of scalar values in proper format leaving the upper part
921 undefined. */
922 const int x86_sse_split_regs = m_ATHLON_K8;
923 const int x86_sse_typeless_stores = m_ATHLON_K8;
924 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
925 const int x86_use_ffreep = m_ATHLON_K8;
926 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
927 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
929 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
930 integer data in xmm registers. Which results in pretty abysmal code. */
931 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
933 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
934 /* Some CPU cores are not able to predict more than 4 branch instructions in
935 the 16 byte window. */
936 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
937 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
938 const int x86_use_bt = m_ATHLON_K8;
939 /* Compare and exchange was added for 80486. */
940 const int x86_cmpxchg = ~m_386;
941 /* Compare and exchange 8 bytes was added for pentium. */
942 const int x86_cmpxchg8b = ~(m_386 | m_486);
943 /* Compare and exchange 16 bytes was added for nocona. */
944 const int x86_cmpxchg16b = m_NOCONA;
945 /* Exchange and add was added for 80486. */
946 const int x86_xadd = ~m_386;
947 /* Byteswap was added for 80486. */
948 const int x86_bswap = ~m_386;
949 const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
951 /* In case the average insn count for single function invocation is
952 lower than this constant, emit fast (but longer) prologue and
953 epilogue code. */
954 #define FAST_PROLOGUE_INSN_COUNT 20
956 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
957 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
958 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
959 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
961 /* Array of the smallest class containing reg number REGNO, indexed by
962 REGNO. Used by REGNO_REG_CLASS in i386.h. */
964 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
966 /* ax, dx, cx, bx */
967 AREG, DREG, CREG, BREG,
968 /* si, di, bp, sp */
969 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
970 /* FP registers */
971 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
972 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
973 /* arg pointer */
974 NON_Q_REGS,
975 /* flags, fpsr, fpcr, dirflag, frame */
976 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
977 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
978 SSE_REGS, SSE_REGS,
979 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
980 MMX_REGS, MMX_REGS,
981 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
982 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
983 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
984 SSE_REGS, SSE_REGS,
987 /* The "default" register map used in 32bit mode. */
989 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
991 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
992 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
993 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
994 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
995 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
996 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
997 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1000 static int const x86_64_int_parameter_registers[6] =
1002 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1003 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1006 static int const x86_64_int_return_registers[4] =
1008 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1011 /* The "default" register map used in 64bit mode. */
1012 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1014 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1015 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1016 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1017 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1018 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1019 8,9,10,11,12,13,14,15, /* extended integer registers */
1020 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1023 /* Define the register numbers to be used in Dwarf debugging information.
1024 The SVR4 reference port C compiler uses the following register numbers
1025 in its Dwarf output code:
1026 0 for %eax (gcc regno = 0)
1027 1 for %ecx (gcc regno = 2)
1028 2 for %edx (gcc regno = 1)
1029 3 for %ebx (gcc regno = 3)
1030 4 for %esp (gcc regno = 7)
1031 5 for %ebp (gcc regno = 6)
1032 6 for %esi (gcc regno = 4)
1033 7 for %edi (gcc regno = 5)
1034 The following three DWARF register numbers are never generated by
1035 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1036 believes these numbers have these meanings.
1037 8 for %eip (no gcc equivalent)
1038 9 for %eflags (gcc regno = 17)
1039 10 for %trapno (no gcc equivalent)
1040 It is not at all clear how we should number the FP stack registers
1041 for the x86 architecture. If the version of SDB on x86/svr4 were
1042 a bit less brain dead with respect to floating-point then we would
1043 have a precedent to follow with respect to DWARF register numbers
1044 for x86 FP registers, but the SDB on x86/svr4 is so completely
1045 broken with respect to FP registers that it is hardly worth thinking
1046 of it as something to strive for compatibility with.
1047 The version of x86/svr4 SDB I have at the moment does (partially)
1048 seem to believe that DWARF register number 11 is associated with
1049 the x86 register %st(0), but that's about all. Higher DWARF
1050 register numbers don't seem to be associated with anything in
1051 particular, and even for DWARF regno 11, SDB only seems to under-
1052 stand that it should say that a variable lives in %st(0) (when
1053 asked via an `=' command) if we said it was in DWARF regno 11,
1054 but SDB still prints garbage when asked for the value of the
1055 variable in question (via a `/' command).
1056 (Also note that the labels SDB prints for various FP stack regs
1057 when doing an `x' command are all wrong.)
1058 Note that these problems generally don't affect the native SVR4
1059 C compiler because it doesn't allow the use of -O with -g and
1060 because when it is *not* optimizing, it allocates a memory
1061 location for each floating-point variable, and the memory
1062 location is what gets described in the DWARF AT_location
1063 attribute for the variable in question.
1064 Regardless of the severe mental illness of the x86/svr4 SDB, we
1065 do something sensible here and we use the following DWARF
1066 register numbers. Note that these are all stack-top-relative
1067 numbers.
1068 11 for %st(0) (gcc regno = 8)
1069 12 for %st(1) (gcc regno = 9)
1070 13 for %st(2) (gcc regno = 10)
1071 14 for %st(3) (gcc regno = 11)
1072 15 for %st(4) (gcc regno = 12)
1073 16 for %st(5) (gcc regno = 13)
1074 17 for %st(6) (gcc regno = 14)
1075 18 for %st(7) (gcc regno = 15)
1077 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1079 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1080 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1081 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1082 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1083 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1084 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1085 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1088 /* Test and compare insns in i386.md store the information needed to
1089 generate branch and scc insns here. */
1091 rtx ix86_compare_op0 = NULL_RTX;
1092 rtx ix86_compare_op1 = NULL_RTX;
1093 rtx ix86_compare_emitted = NULL_RTX;
1095 /* Size of the register save area. */
1096 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1098 /* Define the structure for the machine field in struct function. */
1100 struct stack_local_entry GTY(())
1102 unsigned short mode;
1103 unsigned short n;
1104 rtx rtl;
1105 struct stack_local_entry *next;
1108 /* Structure describing stack frame layout.
1109 Stack grows downward:
1111 [arguments]
1112 <- ARG_POINTER
1113 saved pc
1115 saved frame pointer if frame_pointer_needed
1116 <- HARD_FRAME_POINTER
1117 [saved regs]
1119 [padding1] \
1121 [va_arg registers] (
1122 > to_allocate <- FRAME_POINTER
1123 [frame] (
1125 [padding2] /
1127 struct ix86_frame
1129 int nregs;
1130 int padding1;
1131 int va_arg_size;
1132 HOST_WIDE_INT frame;
1133 int padding2;
1134 int outgoing_arguments_size;
1135 int red_zone_size;
1137 HOST_WIDE_INT to_allocate;
1138 /* The offsets relative to ARG_POINTER. */
1139 HOST_WIDE_INT frame_pointer_offset;
1140 HOST_WIDE_INT hard_frame_pointer_offset;
1141 HOST_WIDE_INT stack_pointer_offset;
1143 /* When save_regs_using_mov is set, emit prologue using
1144 move instead of push instructions. */
1145 bool save_regs_using_mov;
1148 /* Code model option. */
1149 enum cmodel ix86_cmodel;
1150 /* Asm dialect. */
1151 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1152 /* TLS dialects. */
1153 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1155 /* Which unit we are generating floating point math for. */
1156 enum fpmath_unit ix86_fpmath;
1158 /* Which cpu are we scheduling for. */
1159 enum processor_type ix86_tune;
1160 /* Which instruction set architecture to use. */
1161 enum processor_type ix86_arch;
1163 /* true if sse prefetch instruction is not NOOP. */
1164 int x86_prefetch_sse;
1166 /* ix86_regparm_string as a number */
1167 static int ix86_regparm;
1169 /* -mstackrealign option */
1170 extern int ix86_force_align_arg_pointer;
1171 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1173 /* Preferred alignment for stack boundary in bits. */
1174 unsigned int ix86_preferred_stack_boundary;
1176 /* Values 1-5: see jump.c */
1177 int ix86_branch_cost;
1179 /* Variables which are this size or smaller are put in the data/bss
1180 or ldata/lbss sections. */
1182 int ix86_section_threshold = 65536;
1184 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1185 char internal_label_prefix[16];
1186 int internal_label_prefix_len;
1188 static bool ix86_handle_option (size_t, const char *, int);
1189 static void output_pic_addr_const (FILE *, rtx, int);
1190 static void put_condition_code (enum rtx_code, enum machine_mode,
1191 int, int, FILE *);
1192 static const char *get_some_local_dynamic_name (void);
1193 static int get_some_local_dynamic_name_1 (rtx *, void *);
1194 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1195 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1196 rtx *);
1197 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1198 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1199 enum machine_mode);
1200 static rtx get_thread_pointer (int);
1201 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1202 static void get_pc_thunk_name (char [32], unsigned int);
1203 static rtx gen_push (rtx);
1204 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1205 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1206 static struct machine_function * ix86_init_machine_status (void);
1207 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1208 static int ix86_nsaved_regs (void);
1209 static void ix86_emit_save_regs (void);
1210 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1211 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1212 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1213 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1214 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1215 static rtx ix86_expand_aligntest (rtx, int);
1216 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1217 static int ix86_issue_rate (void);
1218 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1219 static int ia32_multipass_dfa_lookahead (void);
1220 static void ix86_init_mmx_sse_builtins (void);
1221 static rtx x86_this_parameter (tree);
1222 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1223 HOST_WIDE_INT, tree);
1224 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1225 static void x86_file_start (void);
1226 static void ix86_reorg (void);
1227 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1228 static tree ix86_build_builtin_va_list (void);
1229 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1230 tree, int *, int);
1231 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1232 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1233 static bool ix86_vector_mode_supported_p (enum machine_mode);
1235 static int ix86_address_cost (rtx);
1236 static bool ix86_cannot_force_const_mem (rtx);
1237 static rtx ix86_delegitimize_address (rtx);
1239 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1241 struct builtin_description;
1242 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1243 tree, rtx);
1244 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1245 tree, rtx);
1246 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1247 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1248 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1249 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1250 static rtx safe_vector_operand (rtx, enum machine_mode);
1251 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1252 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1253 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1254 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1255 static int ix86_fp_comparison_cost (enum rtx_code code);
1256 static unsigned int ix86_select_alt_pic_regnum (void);
1257 static int ix86_save_reg (unsigned int, int);
1258 static void ix86_compute_frame_layout (struct ix86_frame *);
1259 static int ix86_comp_type_attributes (tree, tree);
1260 static int ix86_function_regparm (tree, tree);
1261 const struct attribute_spec ix86_attribute_table[];
1262 static bool ix86_function_ok_for_sibcall (tree, tree);
1263 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1264 static int ix86_value_regno (enum machine_mode, tree, tree);
1265 static bool contains_128bit_aligned_vector_p (tree);
1266 static rtx ix86_struct_value_rtx (tree, int);
1267 static bool ix86_ms_bitfield_layout_p (tree);
1268 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1269 static int extended_reg_mentioned_1 (rtx *, void *);
1270 static bool ix86_rtx_costs (rtx, int, int, int *);
1271 static int min_insn_size (rtx);
1272 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1273 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1274 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1275 tree, bool);
1276 static void ix86_init_builtins (void);
1277 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1278 static const char *ix86_mangle_fundamental_type (tree);
1279 static tree ix86_stack_protect_fail (void);
1280 static rtx ix86_internal_arg_pointer (void);
1281 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1283 /* This function is only used on Solaris. */
1284 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1285 ATTRIBUTE_UNUSED;
1287 /* Register class used for passing given 64bit part of the argument.
1288 These represent classes as documented by the PS ABI, with the exception
1289 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1290 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1292 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1293 whenever possible (upper half does contain padding).
1295 enum x86_64_reg_class
1297 X86_64_NO_CLASS,
1298 X86_64_INTEGER_CLASS,
1299 X86_64_INTEGERSI_CLASS,
1300 X86_64_SSE_CLASS,
1301 X86_64_SSESF_CLASS,
1302 X86_64_SSEDF_CLASS,
1303 X86_64_SSEUP_CLASS,
1304 X86_64_X87_CLASS,
1305 X86_64_X87UP_CLASS,
1306 X86_64_COMPLEX_X87_CLASS,
1307 X86_64_MEMORY_CLASS
1309 static const char * const x86_64_reg_class_name[] = {
1310 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1311 "sseup", "x87", "x87up", "cplx87", "no"
1314 #define MAX_CLASSES 4
1316 /* Table of constants used by fldpi, fldln2, etc.... */
1317 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1318 static bool ext_80387_constants_init = 0;
1319 static void init_ext_80387_constants (void);
1320 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1321 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1322 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1323 static section *x86_64_elf_select_section (tree decl, int reloc,
1324 unsigned HOST_WIDE_INT align)
1325 ATTRIBUTE_UNUSED;
1327 /* Initialize the GCC target structure. */
1328 #undef TARGET_ATTRIBUTE_TABLE
1329 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1330 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1331 # undef TARGET_MERGE_DECL_ATTRIBUTES
1332 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1333 #endif
1335 #undef TARGET_COMP_TYPE_ATTRIBUTES
1336 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1338 #undef TARGET_INIT_BUILTINS
1339 #define TARGET_INIT_BUILTINS ix86_init_builtins
1340 #undef TARGET_EXPAND_BUILTIN
1341 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1343 #undef TARGET_ASM_FUNCTION_EPILOGUE
1344 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1346 #undef TARGET_ENCODE_SECTION_INFO
1347 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1348 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1349 #else
1350 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1351 #endif
1353 #undef TARGET_ASM_OPEN_PAREN
1354 #define TARGET_ASM_OPEN_PAREN ""
1355 #undef TARGET_ASM_CLOSE_PAREN
1356 #define TARGET_ASM_CLOSE_PAREN ""
1358 #undef TARGET_ASM_ALIGNED_HI_OP
1359 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1360 #undef TARGET_ASM_ALIGNED_SI_OP
1361 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1362 #ifdef ASM_QUAD
1363 #undef TARGET_ASM_ALIGNED_DI_OP
1364 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1365 #endif
1367 #undef TARGET_ASM_UNALIGNED_HI_OP
1368 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1369 #undef TARGET_ASM_UNALIGNED_SI_OP
1370 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1371 #undef TARGET_ASM_UNALIGNED_DI_OP
1372 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1374 #undef TARGET_SCHED_ADJUST_COST
1375 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1376 #undef TARGET_SCHED_ISSUE_RATE
1377 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1378 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1379 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1380 ia32_multipass_dfa_lookahead
1382 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1383 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1385 #ifdef HAVE_AS_TLS
1386 #undef TARGET_HAVE_TLS
1387 #define TARGET_HAVE_TLS true
1388 #endif
1389 #undef TARGET_CANNOT_FORCE_CONST_MEM
1390 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1391 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1392 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1394 #undef TARGET_DELEGITIMIZE_ADDRESS
1395 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1397 #undef TARGET_MS_BITFIELD_LAYOUT_P
1398 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1400 #if TARGET_MACHO
1401 #undef TARGET_BINDS_LOCAL_P
1402 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1403 #endif
1405 #undef TARGET_ASM_OUTPUT_MI_THUNK
1406 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1407 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1408 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1410 #undef TARGET_ASM_FILE_START
1411 #define TARGET_ASM_FILE_START x86_file_start
1413 #undef TARGET_DEFAULT_TARGET_FLAGS
1414 #define TARGET_DEFAULT_TARGET_FLAGS \
1415 (TARGET_DEFAULT \
1416 | TARGET_64BIT_DEFAULT \
1417 | TARGET_SUBTARGET_DEFAULT \
1418 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1420 #undef TARGET_HANDLE_OPTION
1421 #define TARGET_HANDLE_OPTION ix86_handle_option
1423 #undef TARGET_RTX_COSTS
1424 #define TARGET_RTX_COSTS ix86_rtx_costs
1425 #undef TARGET_ADDRESS_COST
1426 #define TARGET_ADDRESS_COST ix86_address_cost
1428 #undef TARGET_FIXED_CONDITION_CODE_REGS
1429 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1430 #undef TARGET_CC_MODES_COMPATIBLE
1431 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1433 #undef TARGET_MACHINE_DEPENDENT_REORG
1434 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1436 #undef TARGET_BUILD_BUILTIN_VA_LIST
1437 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1439 #undef TARGET_MD_ASM_CLOBBERS
1440 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1442 #undef TARGET_PROMOTE_PROTOTYPES
1443 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1444 #undef TARGET_STRUCT_VALUE_RTX
1445 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1446 #undef TARGET_SETUP_INCOMING_VARARGS
1447 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1448 #undef TARGET_MUST_PASS_IN_STACK
1449 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1450 #undef TARGET_PASS_BY_REFERENCE
1451 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1452 #undef TARGET_INTERNAL_ARG_POINTER
1453 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1454 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1455 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1457 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1458 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1460 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1461 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1463 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1464 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1466 #ifdef HAVE_AS_TLS
1467 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1468 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1469 #endif
1471 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1472 #undef TARGET_INSERT_ATTRIBUTES
1473 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1474 #endif
1476 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1477 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1479 #undef TARGET_STACK_PROTECT_FAIL
1480 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1482 #undef TARGET_FUNCTION_VALUE
1483 #define TARGET_FUNCTION_VALUE ix86_function_value
1485 struct gcc_target targetm = TARGET_INITIALIZER;
1488 /* The svr4 ABI for the i386 says that records and unions are returned
1489 in memory. */
1490 #ifndef DEFAULT_PCC_STRUCT_RETURN
1491 #define DEFAULT_PCC_STRUCT_RETURN 1
1492 #endif
1494 /* Implement TARGET_HANDLE_OPTION. */
1496 static bool
1497 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1499 switch (code)
1501 case OPT_m3dnow:
1502 if (!value)
1504 target_flags &= ~MASK_3DNOW_A;
1505 target_flags_explicit |= MASK_3DNOW_A;
1507 return true;
1509 case OPT_mmmx:
1510 if (!value)
1512 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1513 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1515 return true;
1517 case OPT_msse:
1518 if (!value)
1520 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1521 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1523 return true;
1525 case OPT_msse2:
1526 if (!value)
1528 target_flags &= ~MASK_SSE3;
1529 target_flags_explicit |= MASK_SSE3;
1531 return true;
1533 default:
1534 return true;
1538 /* Sometimes certain combinations of command options do not make
1539 sense on a particular target machine. You can define a macro
1540 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1541 defined, is executed once just after all the command options have
1542 been parsed.
1544 Don't use this macro to turn on various extra optimizations for
1545 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1547 void
1548 override_options (void)
1550 int i;
1551 int ix86_tune_defaulted = 0;
1553 /* Comes from final.c -- no real reason to change it. */
1554 #define MAX_CODE_ALIGN 16
1556 static struct ptt
1558 const struct processor_costs *cost; /* Processor costs */
1559 const int target_enable; /* Target flags to enable. */
1560 const int target_disable; /* Target flags to disable. */
1561 const int align_loop; /* Default alignments. */
1562 const int align_loop_max_skip;
1563 const int align_jump;
1564 const int align_jump_max_skip;
1565 const int align_func;
1567 const processor_target_table[PROCESSOR_max] =
1569 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1570 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1571 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1572 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1573 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1574 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1575 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1576 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1577 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1578 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1579 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1580 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1581 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1584 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1585 static struct pta
1587 const char *const name; /* processor name or nickname. */
1588 const enum processor_type processor;
1589 const enum pta_flags
1591 PTA_SSE = 1,
1592 PTA_SSE2 = 2,
1593 PTA_SSE3 = 4,
1594 PTA_MMX = 8,
1595 PTA_PREFETCH_SSE = 16,
1596 PTA_3DNOW = 32,
1597 PTA_3DNOW_A = 64,
1598 PTA_64BIT = 128,
1599 PTA_SSSE3 = 256
1600 } flags;
1602 const processor_alias_table[] =
1604 {"i386", PROCESSOR_I386, 0},
1605 {"i486", PROCESSOR_I486, 0},
1606 {"i586", PROCESSOR_PENTIUM, 0},
1607 {"pentium", PROCESSOR_PENTIUM, 0},
1608 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1609 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1610 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1611 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1612 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1613 {"i686", PROCESSOR_PENTIUMPRO, 0},
1614 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1615 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1616 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1617 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1618 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1619 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1620 | PTA_MMX | PTA_PREFETCH_SSE},
1621 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1622 | PTA_MMX | PTA_PREFETCH_SSE},
1623 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1624 | PTA_MMX | PTA_PREFETCH_SSE},
1625 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1626 | PTA_MMX | PTA_PREFETCH_SSE},
1627 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
1628 | PTA_64BIT | PTA_MMX
1629 | PTA_PREFETCH_SSE},
1630 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1631 | PTA_3DNOW_A},
1632 {"k6", PROCESSOR_K6, PTA_MMX},
1633 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1634 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1635 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1636 | PTA_3DNOW_A},
1637 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1638 | PTA_3DNOW | PTA_3DNOW_A},
1639 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1640 | PTA_3DNOW_A | PTA_SSE},
1641 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1642 | PTA_3DNOW_A | PTA_SSE},
1643 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1644 | PTA_3DNOW_A | PTA_SSE},
1645 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1646 | PTA_SSE | PTA_SSE2 },
1647 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1648 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1649 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1650 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1651 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1652 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1653 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1654 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1655 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1656 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1659 int const pta_size = ARRAY_SIZE (processor_alias_table);
1661 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1662 SUBTARGET_OVERRIDE_OPTIONS;
1663 #endif
1665 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1666 SUBSUBTARGET_OVERRIDE_OPTIONS;
1667 #endif
1669 /* -fPIC is the default for x86_64. */
1670 if (TARGET_MACHO && TARGET_64BIT)
1671 flag_pic = 2;
1673 /* Set the default values for switches whose default depends on TARGET_64BIT
1674 in case they weren't overwritten by command line options. */
1675 if (TARGET_64BIT)
1677 /* Mach-O doesn't support omitting the frame pointer for now. */
1678 if (flag_omit_frame_pointer == 2)
1679 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1680 if (flag_asynchronous_unwind_tables == 2)
1681 flag_asynchronous_unwind_tables = 1;
1682 if (flag_pcc_struct_return == 2)
1683 flag_pcc_struct_return = 0;
1685 else
1687 if (flag_omit_frame_pointer == 2)
1688 flag_omit_frame_pointer = 0;
1689 if (flag_asynchronous_unwind_tables == 2)
1690 flag_asynchronous_unwind_tables = 0;
1691 if (flag_pcc_struct_return == 2)
1692 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1695 /* Need to check -mtune=generic first. */
1696 if (ix86_tune_string)
1698 if (!strcmp (ix86_tune_string, "generic")
1699 || !strcmp (ix86_tune_string, "i686")
1700 /* As special support for cross compilers we read -mtune=native
1701 as -mtune=generic. With native compilers we won't see the
1702 -mtune=native, as it was changed by the driver. */
1703 || !strcmp (ix86_tune_string, "native"))
1705 if (TARGET_64BIT)
1706 ix86_tune_string = "generic64";
1707 else
1708 ix86_tune_string = "generic32";
1710 else if (!strncmp (ix86_tune_string, "generic", 7))
1711 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1713 else
1715 if (ix86_arch_string)
1716 ix86_tune_string = ix86_arch_string;
1717 if (!ix86_tune_string)
1719 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1720 ix86_tune_defaulted = 1;
1723 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1724 need to use a sensible tune option. */
1725 if (!strcmp (ix86_tune_string, "generic")
1726 || !strcmp (ix86_tune_string, "x86-64")
1727 || !strcmp (ix86_tune_string, "i686"))
1729 if (TARGET_64BIT)
1730 ix86_tune_string = "generic64";
1731 else
1732 ix86_tune_string = "generic32";
1735 if (!strcmp (ix86_tune_string, "x86-64"))
1736 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1737 "-mtune=generic instead as appropriate.");
1739 if (!ix86_arch_string)
1740 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1741 if (!strcmp (ix86_arch_string, "generic"))
1742 error ("generic CPU can be used only for -mtune= switch");
1743 if (!strncmp (ix86_arch_string, "generic", 7))
1744 error ("bad value (%s) for -march= switch", ix86_arch_string);
1746 if (ix86_cmodel_string != 0)
1748 if (!strcmp (ix86_cmodel_string, "small"))
1749 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1750 else if (!strcmp (ix86_cmodel_string, "medium"))
1751 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1752 else if (flag_pic)
1753 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1754 else if (!strcmp (ix86_cmodel_string, "32"))
1755 ix86_cmodel = CM_32;
1756 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1757 ix86_cmodel = CM_KERNEL;
1758 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1759 ix86_cmodel = CM_LARGE;
1760 else
1761 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1763 else
1765 ix86_cmodel = CM_32;
1766 if (TARGET_64BIT)
1767 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1769 if (ix86_asm_string != 0)
1771 if (! TARGET_MACHO
1772 && !strcmp (ix86_asm_string, "intel"))
1773 ix86_asm_dialect = ASM_INTEL;
1774 else if (!strcmp (ix86_asm_string, "att"))
1775 ix86_asm_dialect = ASM_ATT;
1776 else
1777 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1779 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1780 error ("code model %qs not supported in the %s bit mode",
1781 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1782 if (ix86_cmodel == CM_LARGE)
1783 sorry ("code model %<large%> not supported yet");
1784 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1785 sorry ("%i-bit mode not compiled in",
1786 (target_flags & MASK_64BIT) ? 64 : 32);
1788 for (i = 0; i < pta_size; i++)
1789 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1791 ix86_arch = processor_alias_table[i].processor;
1792 /* Default cpu tuning to the architecture. */
1793 ix86_tune = ix86_arch;
1794 if (processor_alias_table[i].flags & PTA_MMX
1795 && !(target_flags_explicit & MASK_MMX))
1796 target_flags |= MASK_MMX;
1797 if (processor_alias_table[i].flags & PTA_3DNOW
1798 && !(target_flags_explicit & MASK_3DNOW))
1799 target_flags |= MASK_3DNOW;
1800 if (processor_alias_table[i].flags & PTA_3DNOW_A
1801 && !(target_flags_explicit & MASK_3DNOW_A))
1802 target_flags |= MASK_3DNOW_A;
1803 if (processor_alias_table[i].flags & PTA_SSE
1804 && !(target_flags_explicit & MASK_SSE))
1805 target_flags |= MASK_SSE;
1806 if (processor_alias_table[i].flags & PTA_SSE2
1807 && !(target_flags_explicit & MASK_SSE2))
1808 target_flags |= MASK_SSE2;
1809 if (processor_alias_table[i].flags & PTA_SSE3
1810 && !(target_flags_explicit & MASK_SSE3))
1811 target_flags |= MASK_SSE3;
1812 if (processor_alias_table[i].flags & PTA_SSSE3
1813 && !(target_flags_explicit & MASK_SSSE3))
1814 target_flags |= MASK_SSSE3;
1815 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1816 x86_prefetch_sse = true;
1817 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1818 error ("CPU you selected does not support x86-64 "
1819 "instruction set");
1820 break;
1823 if (i == pta_size)
1824 error ("bad value (%s) for -march= switch", ix86_arch_string);
1826 for (i = 0; i < pta_size; i++)
1827 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1829 ix86_tune = processor_alias_table[i].processor;
1830 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1832 if (ix86_tune_defaulted)
1834 ix86_tune_string = "x86-64";
1835 for (i = 0; i < pta_size; i++)
1836 if (! strcmp (ix86_tune_string,
1837 processor_alias_table[i].name))
1838 break;
1839 ix86_tune = processor_alias_table[i].processor;
1841 else
1842 error ("CPU you selected does not support x86-64 "
1843 "instruction set");
1845 /* Intel CPUs have always interpreted SSE prefetch instructions as
1846 NOPs; so, we can enable SSE prefetch instructions even when
1847 -mtune (rather than -march) points us to a processor that has them.
1848 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1849 higher processors. */
1850 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1851 x86_prefetch_sse = true;
1852 break;
1854 if (i == pta_size)
1855 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1857 if (optimize_size)
1858 ix86_cost = &size_cost;
1859 else
1860 ix86_cost = processor_target_table[ix86_tune].cost;
1861 target_flags |= processor_target_table[ix86_tune].target_enable;
1862 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1864 /* Arrange to set up i386_stack_locals for all functions. */
1865 init_machine_status = ix86_init_machine_status;
1867 /* Validate -mregparm= value. */
1868 if (ix86_regparm_string)
1870 i = atoi (ix86_regparm_string);
1871 if (i < 0 || i > REGPARM_MAX)
1872 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1873 else
1874 ix86_regparm = i;
1876 else
1877 if (TARGET_64BIT)
1878 ix86_regparm = REGPARM_MAX;
1880 /* If the user has provided any of the -malign-* options,
1881 warn and use that value only if -falign-* is not set.
1882 Remove this code in GCC 3.2 or later. */
1883 if (ix86_align_loops_string)
1885 warning (0, "-malign-loops is obsolete, use -falign-loops");
1886 if (align_loops == 0)
1888 i = atoi (ix86_align_loops_string);
1889 if (i < 0 || i > MAX_CODE_ALIGN)
1890 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1891 else
1892 align_loops = 1 << i;
1896 if (ix86_align_jumps_string)
1898 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1899 if (align_jumps == 0)
1901 i = atoi (ix86_align_jumps_string);
1902 if (i < 0 || i > MAX_CODE_ALIGN)
1903 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1904 else
1905 align_jumps = 1 << i;
1909 if (ix86_align_funcs_string)
1911 warning (0, "-malign-functions is obsolete, use -falign-functions");
1912 if (align_functions == 0)
1914 i = atoi (ix86_align_funcs_string);
1915 if (i < 0 || i > MAX_CODE_ALIGN)
1916 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1917 else
1918 align_functions = 1 << i;
1922 /* Default align_* from the processor table. */
1923 if (align_loops == 0)
1925 align_loops = processor_target_table[ix86_tune].align_loop;
1926 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1928 if (align_jumps == 0)
1930 align_jumps = processor_target_table[ix86_tune].align_jump;
1931 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1933 if (align_functions == 0)
1935 align_functions = processor_target_table[ix86_tune].align_func;
1938 /* Validate -mbranch-cost= value, or provide default. */
1939 ix86_branch_cost = ix86_cost->branch_cost;
1940 if (ix86_branch_cost_string)
1942 i = atoi (ix86_branch_cost_string);
1943 if (i < 0 || i > 5)
1944 error ("-mbranch-cost=%d is not between 0 and 5", i);
1945 else
1946 ix86_branch_cost = i;
1948 if (ix86_section_threshold_string)
1950 i = atoi (ix86_section_threshold_string);
1951 if (i < 0)
1952 error ("-mlarge-data-threshold=%d is negative", i);
1953 else
1954 ix86_section_threshold = i;
1957 if (ix86_tls_dialect_string)
1959 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1960 ix86_tls_dialect = TLS_DIALECT_GNU;
1961 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1962 ix86_tls_dialect = TLS_DIALECT_GNU2;
1963 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1964 ix86_tls_dialect = TLS_DIALECT_SUN;
1965 else
1966 error ("bad value (%s) for -mtls-dialect= switch",
1967 ix86_tls_dialect_string);
1970 /* Keep nonleaf frame pointers. */
1971 if (flag_omit_frame_pointer)
1972 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1973 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1974 flag_omit_frame_pointer = 1;
1976 /* If we're doing fast math, we don't care about comparison order
1977 wrt NaNs. This lets us use a shorter comparison sequence. */
1978 if (flag_finite_math_only)
1979 target_flags &= ~MASK_IEEE_FP;
1981 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1982 since the insns won't need emulation. */
1983 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1984 target_flags &= ~MASK_NO_FANCY_MATH_387;
1986 /* Likewise, if the target doesn't have a 387, or we've specified
1987 software floating point, don't use 387 inline intrinsics. */
1988 if (!TARGET_80387)
1989 target_flags |= MASK_NO_FANCY_MATH_387;
1991 /* Turn on SSE3 builtins for -mssse3. */
1992 if (TARGET_SSSE3)
1993 target_flags |= MASK_SSE3;
1995 /* Turn on SSE2 builtins for -msse3. */
1996 if (TARGET_SSE3)
1997 target_flags |= MASK_SSE2;
1999 /* Turn on SSE builtins for -msse2. */
2000 if (TARGET_SSE2)
2001 target_flags |= MASK_SSE;
2003 /* Turn on MMX builtins for -msse. */
2004 if (TARGET_SSE)
2006 target_flags |= MASK_MMX & ~target_flags_explicit;
2007 x86_prefetch_sse = true;
2010 /* Turn on MMX builtins for 3Dnow. */
2011 if (TARGET_3DNOW)
2012 target_flags |= MASK_MMX;
2014 if (TARGET_64BIT)
2016 if (TARGET_ALIGN_DOUBLE)
2017 error ("-malign-double makes no sense in the 64bit mode");
2018 if (TARGET_RTD)
2019 error ("-mrtd calling convention not supported in the 64bit mode");
2021 /* Enable by default the SSE and MMX builtins. Do allow the user to
2022 explicitly disable any of these. In particular, disabling SSE and
2023 MMX for kernel code is extremely useful. */
2024 target_flags
2025 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2026 & ~target_flags_explicit);
2028 else
2030 /* i386 ABI does not specify red zone. It still makes sense to use it
2031 when programmer takes care to stack from being destroyed. */
2032 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2033 target_flags |= MASK_NO_RED_ZONE;
2036 /* Validate -mpreferred-stack-boundary= value, or provide default.
2037 The default of 128 bits is for Pentium III's SSE __m128. We can't
2038 change it because of optimize_size. Otherwise, we can't mix object
2039 files compiled with -Os and -On. */
2040 ix86_preferred_stack_boundary = 128;
2041 if (ix86_preferred_stack_boundary_string)
2043 i = atoi (ix86_preferred_stack_boundary_string);
2044 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2045 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2046 TARGET_64BIT ? 4 : 2);
2047 else
2048 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2051 /* Accept -mx87regparm only if 80387 support is enabled. */
2052 if (TARGET_X87REGPARM
2053 && ! TARGET_80387)
2054 error ("-mx87regparm used without 80387 enabled");
2056 /* Accept -msseregparm only if at least SSE support is enabled. */
2057 if (TARGET_SSEREGPARM
2058 && ! TARGET_SSE)
2059 error ("-msseregparm used without SSE enabled");
2061 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2063 if (ix86_fpmath_string != 0)
2065 if (! strcmp (ix86_fpmath_string, "387"))
2066 ix86_fpmath = FPMATH_387;
2067 else if (! strcmp (ix86_fpmath_string, "sse"))
2069 if (!TARGET_SSE)
2071 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2072 ix86_fpmath = FPMATH_387;
2074 else
2075 ix86_fpmath = FPMATH_SSE;
2077 else if (! strcmp (ix86_fpmath_string, "387,sse")
2078 || ! strcmp (ix86_fpmath_string, "sse,387"))
2080 if (!TARGET_SSE)
2082 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2083 ix86_fpmath = FPMATH_387;
2085 else if (!TARGET_80387)
2087 warning (0, "387 instruction set disabled, using SSE arithmetics");
2088 ix86_fpmath = FPMATH_SSE;
2090 else
2091 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2093 else
2094 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2097 /* If the i387 is disabled, then do not return values in it. */
2098 if (!TARGET_80387)
2099 target_flags &= ~MASK_FLOAT_RETURNS;
2101 if ((x86_accumulate_outgoing_args & TUNEMASK)
2102 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2103 && !optimize_size)
2104 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2106 /* ??? Unwind info is not correct around the CFG unless either a frame
2107 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2108 unwind info generation to be aware of the CFG and propagating states
2109 around edges. */
2110 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2111 || flag_exceptions || flag_non_call_exceptions)
2112 && flag_omit_frame_pointer
2113 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2115 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2116 warning (0, "unwind tables currently require either a frame pointer "
2117 "or -maccumulate-outgoing-args for correctness");
2118 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2121 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2123 char *p;
2124 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2125 p = strchr (internal_label_prefix, 'X');
2126 internal_label_prefix_len = p - internal_label_prefix;
2127 *p = '\0';
2130 /* When scheduling description is not available, disable scheduler pass
2131 so it won't slow down the compilation and make x87 code slower. */
2132 if (!TARGET_SCHEDULE)
2133 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2135 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2136 set_param_value ("simultaneous-prefetches",
2137 ix86_cost->simultaneous_prefetches);
2138 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2139 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2142 /* switch to the appropriate section for output of DECL.
2143 DECL is either a `VAR_DECL' node or a constant of some sort.
2144 RELOC indicates whether forming the initial value of DECL requires
2145 link-time relocations. */
2147 static section *
2148 x86_64_elf_select_section (tree decl, int reloc,
2149 unsigned HOST_WIDE_INT align)
2151 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2152 && ix86_in_large_data_p (decl))
2154 const char *sname = NULL;
2155 unsigned int flags = SECTION_WRITE;
2156 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2158 case SECCAT_DATA:
2159 sname = ".ldata";
2160 break;
2161 case SECCAT_DATA_REL:
2162 sname = ".ldata.rel";
2163 break;
2164 case SECCAT_DATA_REL_LOCAL:
2165 sname = ".ldata.rel.local";
2166 break;
2167 case SECCAT_DATA_REL_RO:
2168 sname = ".ldata.rel.ro";
2169 break;
2170 case SECCAT_DATA_REL_RO_LOCAL:
2171 sname = ".ldata.rel.ro.local";
2172 break;
2173 case SECCAT_BSS:
2174 sname = ".lbss";
2175 flags |= SECTION_BSS;
2176 break;
2177 case SECCAT_RODATA:
2178 case SECCAT_RODATA_MERGE_STR:
2179 case SECCAT_RODATA_MERGE_STR_INIT:
2180 case SECCAT_RODATA_MERGE_CONST:
2181 sname = ".lrodata";
2182 flags = 0;
2183 break;
2184 case SECCAT_SRODATA:
2185 case SECCAT_SDATA:
2186 case SECCAT_SBSS:
2187 gcc_unreachable ();
2188 case SECCAT_TEXT:
2189 case SECCAT_TDATA:
2190 case SECCAT_TBSS:
2191 /* We don't split these for medium model. Place them into
2192 default sections and hope for best. */
2193 break;
2195 if (sname)
2197 /* We might get called with string constants, but get_named_section
2198 doesn't like them as they are not DECLs. Also, we need to set
2199 flags in that case. */
2200 if (!DECL_P (decl))
2201 return get_section (sname, flags, NULL);
2202 return get_named_section (decl, sname, reloc);
2205 return default_elf_select_section (decl, reloc, align);
2208 /* Build up a unique section name, expressed as a
2209 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2210 RELOC indicates whether the initial value of EXP requires
2211 link-time relocations. */
2213 static void
2214 x86_64_elf_unique_section (tree decl, int reloc)
2216 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2217 && ix86_in_large_data_p (decl))
2219 const char *prefix = NULL;
2220 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2221 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2223 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2225 case SECCAT_DATA:
2226 case SECCAT_DATA_REL:
2227 case SECCAT_DATA_REL_LOCAL:
2228 case SECCAT_DATA_REL_RO:
2229 case SECCAT_DATA_REL_RO_LOCAL:
2230 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2231 break;
2232 case SECCAT_BSS:
2233 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2234 break;
2235 case SECCAT_RODATA:
2236 case SECCAT_RODATA_MERGE_STR:
2237 case SECCAT_RODATA_MERGE_STR_INIT:
2238 case SECCAT_RODATA_MERGE_CONST:
2239 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2240 break;
2241 case SECCAT_SRODATA:
2242 case SECCAT_SDATA:
2243 case SECCAT_SBSS:
2244 gcc_unreachable ();
2245 case SECCAT_TEXT:
2246 case SECCAT_TDATA:
2247 case SECCAT_TBSS:
2248 /* We don't split these for medium model. Place them into
2249 default sections and hope for best. */
2250 break;
2252 if (prefix)
2254 const char *name;
2255 size_t nlen, plen;
2256 char *string;
2257 plen = strlen (prefix);
2259 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2260 name = targetm.strip_name_encoding (name);
2261 nlen = strlen (name);
2263 string = alloca (nlen + plen + 1);
2264 memcpy (string, prefix, plen);
2265 memcpy (string + plen, name, nlen + 1);
2267 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2268 return;
2271 default_unique_section (decl, reloc);
2274 #ifdef COMMON_ASM_OP
2275 /* This says how to output assembler code to declare an
2276 uninitialized external linkage data object.
2278 For medium model x86-64 we need to use .largecomm opcode for
2279 large objects. */
2280 void
2281 x86_elf_aligned_common (FILE *file,
2282 const char *name, unsigned HOST_WIDE_INT size,
2283 int align)
2285 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2286 && size > (unsigned int)ix86_section_threshold)
2287 fprintf (file, ".largecomm\t");
2288 else
2289 fprintf (file, "%s", COMMON_ASM_OP);
2290 assemble_name (file, name);
2291 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2292 size, align / BITS_PER_UNIT);
2295 /* Utility function for targets to use in implementing
2296 ASM_OUTPUT_ALIGNED_BSS. */
2298 void
2299 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2300 const char *name, unsigned HOST_WIDE_INT size,
2301 int align)
2303 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2304 && size > (unsigned int)ix86_section_threshold)
2305 switch_to_section (get_named_section (decl, ".lbss", 0));
2306 else
2307 switch_to_section (bss_section);
2308 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2309 #ifdef ASM_DECLARE_OBJECT_NAME
2310 last_assemble_variable_decl = decl;
2311 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2312 #else
2313 /* Standard thing is just output label for the object. */
2314 ASM_OUTPUT_LABEL (file, name);
2315 #endif /* ASM_DECLARE_OBJECT_NAME */
2316 ASM_OUTPUT_SKIP (file, size ? size : 1);
2318 #endif
2320 void
2321 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2323 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2324 make the problem with not enough registers even worse. */
2325 #ifdef INSN_SCHEDULING
2326 if (level > 1)
2327 flag_schedule_insns = 0;
2328 #endif
2330 if (TARGET_MACHO)
2331 /* The Darwin libraries never set errno, so we might as well
2332 avoid calling them when that's the only reason we would. */
2333 flag_errno_math = 0;
2335 /* The default values of these switches depend on the TARGET_64BIT
2336 that is not known at this moment. Mark these values with 2 and
2337 let user the to override these. In case there is no command line option
2338 specifying them, we will set the defaults in override_options. */
2339 if (optimize >= 1)
2340 flag_omit_frame_pointer = 2;
2341 flag_pcc_struct_return = 2;
2342 flag_asynchronous_unwind_tables = 2;
2343 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2344 SUBTARGET_OPTIMIZATION_OPTIONS;
2345 #endif
2348 /* Table of valid machine attributes. */
2349 const struct attribute_spec ix86_attribute_table[] =
2351 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2352 /* Stdcall attribute says callee is responsible for popping arguments
2353 if they are not variable. */
2354 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2355 /* Fastcall attribute says callee is responsible for popping arguments
2356 if they are not variable. */
2357 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2358 /* Cdecl attribute says the callee is a normal C declaration */
2359 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2360 /* Regparm attribute specifies how many integer arguments are to be
2361 passed in registers. */
2362 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2363 /* X87regparm attribute says we are passing floating point arguments
2364 in 80387 registers. */
2365 { "x87regparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2366 /* Sseregparm attribute says we are using x86_64 calling conventions
2367 for FP arguments. */
2368 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2369 /* force_align_arg_pointer says this function realigns the stack at entry. */
2370 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2371 false, true, true, ix86_handle_cconv_attribute },
2372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2373 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2374 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2375 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2376 #endif
2377 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2378 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2379 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2380 SUBTARGET_ATTRIBUTE_TABLE,
2381 #endif
2382 { NULL, 0, 0, false, false, false, NULL }
2385 /* Decide whether we can make a sibling call to a function. DECL is the
2386 declaration of the function being targeted by the call and EXP is the
2387 CALL_EXPR representing the call. */
2389 static bool
2390 ix86_function_ok_for_sibcall (tree decl, tree exp)
2392 tree func;
2393 rtx a, b;
2395 /* If we are generating position-independent code, we cannot sibcall
2396 optimize any indirect call, or a direct call to a global function,
2397 as the PLT requires %ebx be live. */
2398 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2399 return false;
2401 if (decl)
2402 func = decl;
2403 else
2405 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2406 if (POINTER_TYPE_P (func))
2407 func = TREE_TYPE (func);
2410 /* Check that the return value locations are the same. Like
2411 if we are returning floats on the 80387 register stack, we cannot
2412 make a sibcall from a function that doesn't return a float to a
2413 function that does or, conversely, from a function that does return
2414 a float to a function that doesn't; the necessary stack adjustment
2415 would not be executed. This is also the place we notice
2416 differences in the return value ABI. Note that it is ok for one
2417 of the functions to have void return type as long as the return
2418 value of the other is passed in a register. */
2419 a = ix86_function_value (TREE_TYPE (exp), func, false);
2420 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2421 cfun->decl, false);
2422 if (STACK_REG_P (a) || STACK_REG_P (b))
2424 if (!rtx_equal_p (a, b))
2425 return false;
2427 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2429 else if (!rtx_equal_p (a, b))
2430 return false;
2432 /* If this call is indirect, we'll need to be able to use a call-clobbered
2433 register for the address of the target function. Make sure that all
2434 such registers are not used for passing parameters. */
2435 if (!decl && !TARGET_64BIT)
2437 tree type;
2439 /* We're looking at the CALL_EXPR, we need the type of the function. */
2440 type = TREE_OPERAND (exp, 0); /* pointer expression */
2441 type = TREE_TYPE (type); /* pointer type */
2442 type = TREE_TYPE (type); /* function type */
2444 if (ix86_function_regparm (type, NULL) >= 3)
2446 /* ??? Need to count the actual number of registers to be used,
2447 not the possible number of registers. Fix later. */
2448 return false;
2452 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2453 /* Dllimport'd functions are also called indirectly. */
2454 if (decl && DECL_DLLIMPORT_P (decl)
2455 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2456 return false;
2457 #endif
2459 /* If we forced aligned the stack, then sibcalling would unalign the
2460 stack, which may break the called function. */
2461 if (cfun->machine->force_align_arg_pointer)
2462 return false;
2464 /* Otherwise okay. That also includes certain types of indirect calls. */
2465 return true;
2468 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "x87regparm"
2469 and "sseregparm" calling convention attributes;
2470 arguments as in struct attribute_spec.handler. */
2472 static tree
2473 ix86_handle_cconv_attribute (tree *node, tree name,
2474 tree args,
2475 int flags ATTRIBUTE_UNUSED,
2476 bool *no_add_attrs)
2478 if (TREE_CODE (*node) != FUNCTION_TYPE
2479 && TREE_CODE (*node) != METHOD_TYPE
2480 && TREE_CODE (*node) != FIELD_DECL
2481 && TREE_CODE (*node) != TYPE_DECL)
2483 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2484 IDENTIFIER_POINTER (name));
2485 *no_add_attrs = true;
2486 return NULL_TREE;
2489 /* Can combine regparm with all attributes but fastcall. */
2490 if (is_attribute_p ("regparm", name))
2492 tree cst;
2494 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2496 error ("fastcall and regparm attributes are not compatible");
2499 cst = TREE_VALUE (args);
2500 if (TREE_CODE (cst) != INTEGER_CST)
2502 warning (OPT_Wattributes,
2503 "%qs attribute requires an integer constant argument",
2504 IDENTIFIER_POINTER (name));
2505 *no_add_attrs = true;
2507 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2509 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2510 IDENTIFIER_POINTER (name), REGPARM_MAX);
2511 *no_add_attrs = true;
2514 if (!TARGET_64BIT
2515 && lookup_attribute (ix86_force_align_arg_pointer_string,
2516 TYPE_ATTRIBUTES (*node))
2517 && compare_tree_int (cst, REGPARM_MAX-1))
2519 error ("%s functions limited to %d register parameters",
2520 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2523 return NULL_TREE;
2526 if (TARGET_64BIT)
2528 warning (OPT_Wattributes, "%qs attribute ignored",
2529 IDENTIFIER_POINTER (name));
2530 *no_add_attrs = true;
2531 return NULL_TREE;
2534 /* Can combine fastcall with stdcall (redundant), x87regparm
2535 and sseregparm. */
2536 if (is_attribute_p ("fastcall", name))
2538 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2540 error ("fastcall and cdecl attributes are not compatible");
2542 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2544 error ("fastcall and stdcall attributes are not compatible");
2546 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2548 error ("fastcall and regparm attributes are not compatible");
2552 /* Can combine stdcall with fastcall (redundant), regparm,
2553 x87regparm and sseregparm. */
2554 else if (is_attribute_p ("stdcall", name))
2556 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2558 error ("stdcall and cdecl attributes are not compatible");
2560 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2562 error ("stdcall and fastcall attributes are not compatible");
2566 /* Can combine cdecl with regparm, x87regparm and sseregparm. */
2567 else if (is_attribute_p ("cdecl", name))
2569 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2571 error ("stdcall and cdecl attributes are not compatible");
2573 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2575 error ("fastcall and cdecl attributes are not compatible");
2579 /* Can combine x87regparm or sseregparm with all attributes. */
2581 return NULL_TREE;
2584 /* Return 0 if the attributes for two types are incompatible, 1 if they
2585 are compatible, and 2 if they are nearly compatible (which causes a
2586 warning to be generated). */
2588 static int
2589 ix86_comp_type_attributes (tree type1, tree type2)
2591 /* Check for mismatch of non-default calling convention. */
2592 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2594 if (TREE_CODE (type1) != FUNCTION_TYPE)
2595 return 1;
2597 /* Check for mismatched fastcall/regparm types. */
2598 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2599 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2600 || (ix86_function_regparm (type1, NULL)
2601 != ix86_function_regparm (type2, NULL)))
2602 return 0;
2604 /* Check for mismatched x87regparm types. */
2605 if (!lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type1))
2606 != !lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type2)))
2607 return 0;
2609 /* Check for mismatched sseregparm types. */
2610 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2611 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2612 return 0;
2614 /* Check for mismatched return types (cdecl vs stdcall). */
2615 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2616 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2617 return 0;
2619 return 1;
2622 /* Return the regparm value for a function with the indicated TYPE and DECL.
2623 DECL may be NULL when calling function indirectly
2624 or considering a libcall. */
2626 static int
2627 ix86_function_regparm (tree type, tree decl)
2629 tree attr;
2630 int regparm = ix86_regparm;
2631 bool user_convention = false;
2633 if (!TARGET_64BIT)
2635 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2636 if (attr)
2638 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2639 user_convention = true;
2642 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2644 regparm = 2;
2645 user_convention = true;
2648 /* Use register calling convention for local functions when possible. */
2649 if (!TARGET_64BIT && !user_convention && decl
2650 && flag_unit_at_a_time && !profile_flag)
2652 struct cgraph_local_info *i = cgraph_local_info (decl);
2653 if (i && i->local)
2655 int local_regparm, globals = 0, regno;
2657 /* Make sure no regparm register is taken by a global register
2658 variable. */
2659 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2660 if (global_regs[local_regparm])
2661 break;
2662 /* We can't use regparm(3) for nested functions as these use
2663 static chain pointer in third argument. */
2664 if (local_regparm == 3
2665 && decl_function_context (decl)
2666 && !DECL_NO_STATIC_CHAIN (decl))
2667 local_regparm = 2;
2668 /* If the function realigns its stackpointer, the
2669 prologue will clobber %ecx. If we've already
2670 generated code for the callee, the callee
2671 DECL_STRUCT_FUNCTION is gone, so we fall back to
2672 scanning the attributes for the self-realigning
2673 property. */
2674 if ((DECL_STRUCT_FUNCTION (decl)
2675 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2676 || (!DECL_STRUCT_FUNCTION (decl)
2677 && lookup_attribute (ix86_force_align_arg_pointer_string,
2678 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2679 local_regparm = 2;
2680 /* Each global register variable increases register preassure,
2681 so the more global reg vars there are, the smaller regparm
2682 optimization use, unless requested by the user explicitly. */
2683 for (regno = 0; regno < 6; regno++)
2684 if (global_regs[regno])
2685 globals++;
2686 local_regparm
2687 = globals < local_regparm ? local_regparm - globals : 0;
2689 if (local_regparm > regparm)
2690 regparm = local_regparm;
2694 return regparm;
2697 /* Return 1 if we can pass up to X87_REGPARM_MAX floating point
2698 arguments in x87 registers for a function with the indicated
2699 TYPE and DECL. DECL may be NULL when calling function indirectly
2700 or considering a libcall. For local functions, return 2.
2701 Otherwise return 0. */
2703 static int
2704 ix86_function_x87regparm (tree type, tree decl)
2706 /* Use x87 registers to pass floating point arguments if requested
2707 by the x87regparm attribute. */
2708 if (TARGET_X87REGPARM
2709 || (type
2710 && lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type))))
2712 if (!TARGET_80387)
2714 if (decl)
2715 error ("Calling %qD with attribute x87regparm without "
2716 "80387 enabled", decl);
2717 else
2718 error ("Calling %qT with attribute x87regparm without "
2719 "80387 enabled", type);
2720 return 0;
2723 return 1;
2726 /* For local functions, pass up to X87_REGPARM_MAX floating point
2727 arguments in x87 registers. */
2728 if (!TARGET_64BIT && decl
2729 && flag_unit_at_a_time && !profile_flag)
2731 struct cgraph_local_info *i = cgraph_local_info (decl);
2732 if (i && i->local)
2733 return 2;
2736 return 0;
2739 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2740 DFmode (2) arguments in SSE registers for a function with the
2741 indicated TYPE and DECL. DECL may be NULL when calling function
2742 indirectly or considering a libcall. Otherwise return 0. */
2744 static int
2745 ix86_function_sseregparm (tree type, tree decl)
2747 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2748 by the sseregparm attribute. */
2749 if (TARGET_SSEREGPARM
2750 || (type
2751 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2753 if (!TARGET_SSE)
2755 if (decl)
2756 error ("Calling %qD with attribute sseregparm without "
2757 "SSE/SSE2 enabled", decl);
2758 else
2759 error ("Calling %qT with attribute sseregparm without "
2760 "SSE/SSE2 enabled", type);
2761 return 0;
2764 return 2;
2767 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2768 (and DFmode for SSE2) arguments in SSE registers,
2769 even for 32-bit targets. */
2770 if (!TARGET_64BIT && decl
2771 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2773 struct cgraph_local_info *i = cgraph_local_info (decl);
2774 if (i && i->local)
2775 return TARGET_SSE2 ? 2 : 1;
2778 return 0;
2781 /* Return true if EAX is live at the start of the function. Used by
2782 ix86_expand_prologue to determine if we need special help before
2783 calling allocate_stack_worker. */
2785 static bool
2786 ix86_eax_live_at_start_p (void)
2788 /* Cheat. Don't bother working forward from ix86_function_regparm
2789 to the function type to whether an actual argument is located in
2790 eax. Instead just look at cfg info, which is still close enough
2791 to correct at this point. This gives false positives for broken
2792 functions that might use uninitialized data that happens to be
2793 allocated in eax, but who cares? */
2794 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2797 /* Value is the number of bytes of arguments automatically
2798 popped when returning from a subroutine call.
2799 FUNDECL is the declaration node of the function (as a tree),
2800 FUNTYPE is the data type of the function (as a tree),
2801 or for a library call it is an identifier node for the subroutine name.
2802 SIZE is the number of bytes of arguments passed on the stack.
2804 On the 80386, the RTD insn may be used to pop them if the number
2805 of args is fixed, but if the number is variable then the caller
2806 must pop them all. RTD can't be used for library calls now
2807 because the library is compiled with the Unix compiler.
2808 Use of RTD is a selectable option, since it is incompatible with
2809 standard Unix calling sequences. If the option is not selected,
2810 the caller must always pop the args.
2812 The attribute stdcall is equivalent to RTD on a per module basis. */
2815 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2817 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2819 /* Cdecl functions override -mrtd, and never pop the stack. */
2820 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2822 /* Stdcall and fastcall functions will pop the stack if not
2823 variable args. */
2824 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2825 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2826 rtd = 1;
2828 if (rtd
2829 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2830 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2831 == void_type_node)))
2832 return size;
2835 /* Lose any fake structure return argument if it is passed on the stack. */
2836 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2837 && !TARGET_64BIT
2838 && !KEEP_AGGREGATE_RETURN_POINTER)
2840 int nregs = ix86_function_regparm (funtype, fundecl);
2842 if (!nregs)
2843 return GET_MODE_SIZE (Pmode);
2846 return 0;
2849 /* Argument support functions. */
2851 /* Return true when register may be used to pass function parameters. */
2852 bool
2853 ix86_function_arg_regno_p (int regno)
2855 int i;
2856 if (!TARGET_64BIT)
2857 return (regno < REGPARM_MAX
2858 || (TARGET_80387 && FP_REGNO_P (regno)
2859 && (regno < FIRST_FLOAT_REG + X87_REGPARM_MAX))
2860 || (TARGET_MMX && MMX_REGNO_P (regno)
2861 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2862 || (TARGET_SSE && SSE_REGNO_P (regno)
2863 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2865 if (TARGET_SSE && SSE_REGNO_P (regno)
2866 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2867 return true;
2868 /* RAX is used as hidden argument to va_arg functions. */
2869 if (!regno)
2870 return true;
2871 for (i = 0; i < REGPARM_MAX; i++)
2872 if (regno == x86_64_int_parameter_registers[i])
2873 return true;
2874 return false;
2877 /* Return if we do not know how to pass TYPE solely in registers. */
2879 static bool
2880 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2882 if (must_pass_in_stack_var_size_or_pad (mode, type))
2883 return true;
2885 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2886 The layout_type routine is crafty and tries to trick us into passing
2887 currently unsupported vector types on the stack by using TImode. */
2888 return (!TARGET_64BIT && mode == TImode
2889 && type && TREE_CODE (type) != VECTOR_TYPE);
2892 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2893 for a call to a function whose data type is FNTYPE.
2894 For a library call, FNTYPE is 0. */
2896 void
2897 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2898 tree fntype, /* tree ptr for function decl */
2899 rtx libname, /* SYMBOL_REF of library name or 0 */
2900 tree fndecl)
2902 static CUMULATIVE_ARGS zero_cum;
2903 tree param, next_param;
2905 if (TARGET_DEBUG_ARG)
2907 fprintf (stderr, "\ninit_cumulative_args (");
2908 if (fntype)
2909 fprintf (stderr, "fntype code = %s, ret code = %s",
2910 tree_code_name[(int) TREE_CODE (fntype)],
2911 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2912 else
2913 fprintf (stderr, "no fntype");
2915 if (libname)
2916 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2919 *cum = zero_cum;
2921 /* Set up the number of registers to use for passing arguments. */
2922 cum->nregs = ix86_regparm;
2923 if (TARGET_80387)
2924 cum->x87_nregs = X87_REGPARM_MAX;
2925 if (TARGET_SSE)
2926 cum->sse_nregs = SSE_REGPARM_MAX;
2927 if (TARGET_MMX)
2928 cum->mmx_nregs = MMX_REGPARM_MAX;
2929 cum->warn_sse = true;
2930 cum->warn_mmx = true;
2931 cum->maybe_vaarg = false;
2933 /* Use ecx and edx registers if function has fastcall attribute,
2934 else look for regparm information. */
2935 if (fntype && !TARGET_64BIT)
2937 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2939 cum->nregs = 2;
2940 cum->fastcall = 1;
2942 else
2943 cum->nregs = ix86_function_regparm (fntype, fndecl);
2946 /* Set up the number of 80387 registers used for passing
2947 floating point arguments. Warn for mismatching ABI. */
2948 cum->float_in_x87 = ix86_function_x87regparm (fntype, fndecl);
2950 /* Set up the number of SSE registers used for passing SFmode
2951 and DFmode arguments. Warn for mismatching ABI. */
2952 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2954 /* Determine if this function has variable arguments. This is
2955 indicated by the last argument being 'void_type_mode' if there
2956 are no variable arguments. If there are variable arguments, then
2957 we won't pass anything in registers in 32-bit mode. */
2959 if (cum->nregs || cum->mmx_nregs
2960 || cum->x87_nregs || cum->sse_nregs)
2962 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2963 param != 0; param = next_param)
2965 next_param = TREE_CHAIN (param);
2966 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2968 if (!TARGET_64BIT)
2970 cum->nregs = 0;
2971 cum->x87_nregs = 0;
2972 cum->sse_nregs = 0;
2973 cum->mmx_nregs = 0;
2974 cum->warn_sse = 0;
2975 cum->warn_mmx = 0;
2976 cum->fastcall = 0;
2977 cum->float_in_x87 = 0;
2978 cum->float_in_sse = 0;
2980 cum->maybe_vaarg = true;
2984 if ((!fntype && !libname)
2985 || (fntype && !TYPE_ARG_TYPES (fntype)))
2986 cum->maybe_vaarg = true;
2988 if (TARGET_DEBUG_ARG)
2989 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2991 return;
2994 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2995 But in the case of vector types, it is some vector mode.
2997 When we have only some of our vector isa extensions enabled, then there
2998 are some modes for which vector_mode_supported_p is false. For these
2999 modes, the generic vector support in gcc will choose some non-vector mode
3000 in order to implement the type. By computing the natural mode, we'll
3001 select the proper ABI location for the operand and not depend on whatever
3002 the middle-end decides to do with these vector types. */
3004 static enum machine_mode
3005 type_natural_mode (tree type)
3007 enum machine_mode mode = TYPE_MODE (type);
3009 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3011 HOST_WIDE_INT size = int_size_in_bytes (type);
3012 if ((size == 8 || size == 16)
3013 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3014 && TYPE_VECTOR_SUBPARTS (type) > 1)
3016 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3018 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3019 mode = MIN_MODE_VECTOR_FLOAT;
3020 else
3021 mode = MIN_MODE_VECTOR_INT;
3023 /* Get the mode which has this inner mode and number of units. */
3024 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3025 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3026 && GET_MODE_INNER (mode) == innermode)
3027 return mode;
3029 gcc_unreachable ();
3033 return mode;
3036 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3037 this may not agree with the mode that the type system has chosen for the
3038 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3039 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3041 static rtx
3042 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3043 unsigned int regno)
3045 rtx tmp;
3047 if (orig_mode != BLKmode)
3048 tmp = gen_rtx_REG (orig_mode, regno);
3049 else
3051 tmp = gen_rtx_REG (mode, regno);
3052 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3053 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3056 return tmp;
3059 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3060 of this code is to classify each 8bytes of incoming argument by the register
3061 class and assign registers accordingly. */
3063 /* Return the union class of CLASS1 and CLASS2.
3064 See the x86-64 PS ABI for details. */
3066 static enum x86_64_reg_class
3067 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3069 /* Rule #1: If both classes are equal, this is the resulting class. */
3070 if (class1 == class2)
3071 return class1;
3073 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3074 the other class. */
3075 if (class1 == X86_64_NO_CLASS)
3076 return class2;
3077 if (class2 == X86_64_NO_CLASS)
3078 return class1;
3080 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3081 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3082 return X86_64_MEMORY_CLASS;
3084 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3085 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3086 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3087 return X86_64_INTEGERSI_CLASS;
3088 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3089 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3090 return X86_64_INTEGER_CLASS;
3092 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3093 MEMORY is used. */
3094 if (class1 == X86_64_X87_CLASS
3095 || class1 == X86_64_X87UP_CLASS
3096 || class1 == X86_64_COMPLEX_X87_CLASS
3097 || class2 == X86_64_X87_CLASS
3098 || class2 == X86_64_X87UP_CLASS
3099 || class2 == X86_64_COMPLEX_X87_CLASS)
3100 return X86_64_MEMORY_CLASS;
3102 /* Rule #6: Otherwise class SSE is used. */
3103 return X86_64_SSE_CLASS;
3106 /* Classify the argument of type TYPE and mode MODE.
3107 CLASSES will be filled by the register class used to pass each word
3108 of the operand. The number of words is returned. In case the parameter
3109 should be passed in memory, 0 is returned. As a special case for zero
3110 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3112 BIT_OFFSET is used internally for handling records and specifies offset
3113 of the offset in bits modulo 256 to avoid overflow cases.
3115 See the x86-64 PS ABI for details.
3118 static int
3119 classify_argument (enum machine_mode mode, tree type,
3120 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3122 HOST_WIDE_INT bytes =
3123 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3124 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3126 /* Variable sized entities are always passed/returned in memory. */
3127 if (bytes < 0)
3128 return 0;
3130 if (mode != VOIDmode
3131 && targetm.calls.must_pass_in_stack (mode, type))
3132 return 0;
3134 if (type && AGGREGATE_TYPE_P (type))
3136 int i;
3137 tree field;
3138 enum x86_64_reg_class subclasses[MAX_CLASSES];
3140 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3141 if (bytes > 16)
3142 return 0;
3144 for (i = 0; i < words; i++)
3145 classes[i] = X86_64_NO_CLASS;
3147 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3148 signalize memory class, so handle it as special case. */
3149 if (!words)
3151 classes[0] = X86_64_NO_CLASS;
3152 return 1;
3155 /* Classify each field of record and merge classes. */
3156 switch (TREE_CODE (type))
3158 case RECORD_TYPE:
3159 /* And now merge the fields of structure. */
3160 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3162 if (TREE_CODE (field) == FIELD_DECL)
3164 int num;
3166 if (TREE_TYPE (field) == error_mark_node)
3167 continue;
3169 /* Bitfields are always classified as integer. Handle them
3170 early, since later code would consider them to be
3171 misaligned integers. */
3172 if (DECL_BIT_FIELD (field))
3174 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3175 i < ((int_bit_position (field) + (bit_offset % 64))
3176 + tree_low_cst (DECL_SIZE (field), 0)
3177 + 63) / 8 / 8; i++)
3178 classes[i] =
3179 merge_classes (X86_64_INTEGER_CLASS,
3180 classes[i]);
3182 else
3184 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3185 TREE_TYPE (field), subclasses,
3186 (int_bit_position (field)
3187 + bit_offset) % 256);
3188 if (!num)
3189 return 0;
3190 for (i = 0; i < num; i++)
3192 int pos =
3193 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3194 classes[i + pos] =
3195 merge_classes (subclasses[i], classes[i + pos]);
3200 break;
3202 case ARRAY_TYPE:
3203 /* Arrays are handled as small records. */
3205 int num;
3206 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3207 TREE_TYPE (type), subclasses, bit_offset);
3208 if (!num)
3209 return 0;
3211 /* The partial classes are now full classes. */
3212 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3213 subclasses[0] = X86_64_SSE_CLASS;
3214 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3215 subclasses[0] = X86_64_INTEGER_CLASS;
3217 for (i = 0; i < words; i++)
3218 classes[i] = subclasses[i % num];
3220 break;
3222 case UNION_TYPE:
3223 case QUAL_UNION_TYPE:
3224 /* Unions are similar to RECORD_TYPE but offset is always 0.
3226 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3228 if (TREE_CODE (field) == FIELD_DECL)
3230 int num;
3232 if (TREE_TYPE (field) == error_mark_node)
3233 continue;
3235 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3236 TREE_TYPE (field), subclasses,
3237 bit_offset);
3238 if (!num)
3239 return 0;
3240 for (i = 0; i < num; i++)
3241 classes[i] = merge_classes (subclasses[i], classes[i]);
3244 break;
3246 default:
3247 gcc_unreachable ();
3250 /* Final merger cleanup. */
3251 for (i = 0; i < words; i++)
3253 /* If one class is MEMORY, everything should be passed in
3254 memory. */
3255 if (classes[i] == X86_64_MEMORY_CLASS)
3256 return 0;
3258 /* The X86_64_SSEUP_CLASS should be always preceded by
3259 X86_64_SSE_CLASS. */
3260 if (classes[i] == X86_64_SSEUP_CLASS
3261 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3262 classes[i] = X86_64_SSE_CLASS;
3264 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3265 if (classes[i] == X86_64_X87UP_CLASS
3266 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3267 classes[i] = X86_64_SSE_CLASS;
3269 return words;
3272 /* Compute alignment needed. We align all types to natural boundaries with
3273 exception of XFmode that is aligned to 64bits. */
3274 if (mode != VOIDmode && mode != BLKmode)
3276 int mode_alignment = GET_MODE_BITSIZE (mode);
3278 if (mode == XFmode)
3279 mode_alignment = 128;
3280 else if (mode == XCmode)
3281 mode_alignment = 256;
3282 if (COMPLEX_MODE_P (mode))
3283 mode_alignment /= 2;
3284 /* Misaligned fields are always returned in memory. */
3285 if (bit_offset % mode_alignment)
3286 return 0;
3289 /* for V1xx modes, just use the base mode */
3290 if (VECTOR_MODE_P (mode)
3291 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3292 mode = GET_MODE_INNER (mode);
3294 /* Classification of atomic types. */
3295 switch (mode)
3297 case SDmode:
3298 case DDmode:
3299 classes[0] = X86_64_SSE_CLASS;
3300 return 1;
3301 case TDmode:
3302 classes[0] = X86_64_SSE_CLASS;
3303 classes[1] = X86_64_SSEUP_CLASS;
3304 return 2;
3305 case DImode:
3306 case SImode:
3307 case HImode:
3308 case QImode:
3309 case CSImode:
3310 case CHImode:
3311 case CQImode:
3312 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3313 classes[0] = X86_64_INTEGERSI_CLASS;
3314 else
3315 classes[0] = X86_64_INTEGER_CLASS;
3316 return 1;
3317 case CDImode:
3318 case TImode:
3319 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3320 return 2;
3321 case CTImode:
3322 return 0;
3323 case SFmode:
3324 if (!(bit_offset % 64))
3325 classes[0] = X86_64_SSESF_CLASS;
3326 else
3327 classes[0] = X86_64_SSE_CLASS;
3328 return 1;
3329 case DFmode:
3330 classes[0] = X86_64_SSEDF_CLASS;
3331 return 1;
3332 case XFmode:
3333 classes[0] = X86_64_X87_CLASS;
3334 classes[1] = X86_64_X87UP_CLASS;
3335 return 2;
3336 case TFmode:
3337 classes[0] = X86_64_SSE_CLASS;
3338 classes[1] = X86_64_SSEUP_CLASS;
3339 return 2;
3340 case SCmode:
3341 classes[0] = X86_64_SSE_CLASS;
3342 return 1;
3343 case DCmode:
3344 classes[0] = X86_64_SSEDF_CLASS;
3345 classes[1] = X86_64_SSEDF_CLASS;
3346 return 2;
3347 case XCmode:
3348 classes[0] = X86_64_COMPLEX_X87_CLASS;
3349 return 1;
3350 case TCmode:
3351 /* This modes is larger than 16 bytes. */
3352 return 0;
3353 case V4SFmode:
3354 case V4SImode:
3355 case V16QImode:
3356 case V8HImode:
3357 case V2DFmode:
3358 case V2DImode:
3359 classes[0] = X86_64_SSE_CLASS;
3360 classes[1] = X86_64_SSEUP_CLASS;
3361 return 2;
3362 case V2SFmode:
3363 case V2SImode:
3364 case V4HImode:
3365 case V8QImode:
3366 classes[0] = X86_64_SSE_CLASS;
3367 return 1;
3368 case BLKmode:
3369 case VOIDmode:
3370 return 0;
3371 default:
3372 gcc_assert (VECTOR_MODE_P (mode));
3374 if (bytes > 16)
3375 return 0;
3377 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3379 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3380 classes[0] = X86_64_INTEGERSI_CLASS;
3381 else
3382 classes[0] = X86_64_INTEGER_CLASS;
3383 classes[1] = X86_64_INTEGER_CLASS;
3384 return 1 + (bytes > 8);
3388 /* Examine the argument and return set number of register required in each
3389 class. Return 0 iff parameter should be passed in memory. */
3390 static int
3391 examine_argument (enum machine_mode mode, tree type, int in_return,
3392 int *int_nregs, int *sse_nregs)
3394 enum x86_64_reg_class class[MAX_CLASSES];
3395 int n = classify_argument (mode, type, class, 0);
3397 *int_nregs = 0;
3398 *sse_nregs = 0;
3399 if (!n)
3400 return 0;
3401 for (n--; n >= 0; n--)
3402 switch (class[n])
3404 case X86_64_INTEGER_CLASS:
3405 case X86_64_INTEGERSI_CLASS:
3406 (*int_nregs)++;
3407 break;
3408 case X86_64_SSE_CLASS:
3409 case X86_64_SSESF_CLASS:
3410 case X86_64_SSEDF_CLASS:
3411 (*sse_nregs)++;
3412 break;
3413 case X86_64_NO_CLASS:
3414 case X86_64_SSEUP_CLASS:
3415 break;
3416 case X86_64_X87_CLASS:
3417 case X86_64_X87UP_CLASS:
3418 if (!in_return)
3419 return 0;
3420 break;
3421 case X86_64_COMPLEX_X87_CLASS:
3422 return in_return ? 2 : 0;
3423 case X86_64_MEMORY_CLASS:
3424 gcc_unreachable ();
3426 return 1;
3429 /* Construct container for the argument used by GCC interface. See
3430 FUNCTION_ARG for the detailed description. */
3432 static rtx
3433 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3434 tree type, int in_return, int nintregs, int nsseregs,
3435 const int *intreg, int sse_regno)
3437 /* The following variables hold the static issued_error state. */
3438 static bool issued_sse_arg_error;
3439 static bool issued_sse_ret_error;
3440 static bool issued_x87_ret_error;
3442 enum machine_mode tmpmode;
3443 int bytes =
3444 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3445 enum x86_64_reg_class class[MAX_CLASSES];
3446 int n;
3447 int i;
3448 int nexps = 0;
3449 int needed_sseregs, needed_intregs;
3450 rtx exp[MAX_CLASSES];
3451 rtx ret;
3453 n = classify_argument (mode, type, class, 0);
3454 if (TARGET_DEBUG_ARG)
3456 if (!n)
3457 fprintf (stderr, "Memory class\n");
3458 else
3460 fprintf (stderr, "Classes:");
3461 for (i = 0; i < n; i++)
3463 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3465 fprintf (stderr, "\n");
3468 if (!n)
3469 return NULL;
3470 if (!examine_argument (mode, type, in_return, &needed_intregs,
3471 &needed_sseregs))
3472 return NULL;
3473 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3474 return NULL;
3476 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3477 some less clueful developer tries to use floating-point anyway. */
3478 if (needed_sseregs && !TARGET_SSE)
3480 if (in_return)
3482 if (!issued_sse_ret_error)
3484 error ("SSE register return with SSE disabled");
3485 issued_sse_ret_error = true;
3488 else if (!issued_sse_arg_error)
3490 error ("SSE register argument with SSE disabled");
3491 issued_sse_arg_error = true;
3493 return NULL;
3496 /* Likewise, error if the ABI requires us to return values in the
3497 x87 registers and the user specified -mno-80387. */
3498 if (!TARGET_80387 && in_return)
3499 for (i = 0; i < n; i++)
3500 if (class[i] == X86_64_X87_CLASS
3501 || class[i] == X86_64_X87UP_CLASS
3502 || class[i] == X86_64_COMPLEX_X87_CLASS)
3504 if (!issued_x87_ret_error)
3506 error ("x87 register return with x87 disabled");
3507 issued_x87_ret_error = true;
3509 return NULL;
3512 /* First construct simple cases. Avoid SCmode, since we want to use
3513 single register to pass this type. */
3514 if (n == 1 && mode != SCmode)
3515 switch (class[0])
3517 case X86_64_INTEGER_CLASS:
3518 case X86_64_INTEGERSI_CLASS:
3519 return gen_rtx_REG (mode, intreg[0]);
3520 case X86_64_SSE_CLASS:
3521 case X86_64_SSESF_CLASS:
3522 case X86_64_SSEDF_CLASS:
3523 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3524 case X86_64_X87_CLASS:
3525 case X86_64_COMPLEX_X87_CLASS:
3526 return gen_rtx_REG (mode, FIRST_STACK_REG);
3527 case X86_64_NO_CLASS:
3528 /* Zero sized array, struct or class. */
3529 return NULL;
3530 default:
3531 gcc_unreachable ();
3533 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3534 && mode != BLKmode)
3535 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3536 if (n == 2
3537 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3538 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3539 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3540 && class[1] == X86_64_INTEGER_CLASS
3541 && (mode == CDImode || mode == TImode || mode == TFmode)
3542 && intreg[0] + 1 == intreg[1])
3543 return gen_rtx_REG (mode, intreg[0]);
3545 /* Otherwise figure out the entries of the PARALLEL. */
3546 for (i = 0; i < n; i++)
3548 switch (class[i])
3550 case X86_64_NO_CLASS:
3551 break;
3552 case X86_64_INTEGER_CLASS:
3553 case X86_64_INTEGERSI_CLASS:
3554 /* Merge TImodes on aligned occasions here too. */
3555 if (i * 8 + 8 > bytes)
3556 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3557 else if (class[i] == X86_64_INTEGERSI_CLASS)
3558 tmpmode = SImode;
3559 else
3560 tmpmode = DImode;
3561 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3562 if (tmpmode == BLKmode)
3563 tmpmode = DImode;
3564 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3565 gen_rtx_REG (tmpmode, *intreg),
3566 GEN_INT (i*8));
3567 intreg++;
3568 break;
3569 case X86_64_SSESF_CLASS:
3570 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3571 gen_rtx_REG (SFmode,
3572 SSE_REGNO (sse_regno)),
3573 GEN_INT (i*8));
3574 sse_regno++;
3575 break;
3576 case X86_64_SSEDF_CLASS:
3577 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3578 gen_rtx_REG (DFmode,
3579 SSE_REGNO (sse_regno)),
3580 GEN_INT (i*8));
3581 sse_regno++;
3582 break;
3583 case X86_64_SSE_CLASS:
3584 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3585 tmpmode = TImode;
3586 else
3587 tmpmode = DImode;
3588 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3589 gen_rtx_REG (tmpmode,
3590 SSE_REGNO (sse_regno)),
3591 GEN_INT (i*8));
3592 if (tmpmode == TImode)
3593 i++;
3594 sse_regno++;
3595 break;
3596 default:
3597 gcc_unreachable ();
3601 /* Empty aligned struct, union or class. */
3602 if (nexps == 0)
3603 return NULL;
3605 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3606 for (i = 0; i < nexps; i++)
3607 XVECEXP (ret, 0, i) = exp [i];
3608 return ret;
3611 /* Update the data in CUM to advance over an argument
3612 of mode MODE and data type TYPE.
3613 (TYPE is null for libcalls where that information may not be available.) */
3615 void
3616 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3617 tree type, int named)
3619 int bytes =
3620 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3621 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3623 if (type)
3624 mode = type_natural_mode (type);
3626 if (TARGET_DEBUG_ARG)
3627 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3628 "mode=%s, named=%d)\n\n",
3629 words, cum->words, cum->nregs, cum->sse_nregs,
3630 GET_MODE_NAME (mode), named);
3632 if (TARGET_64BIT)
3634 int int_nregs, sse_nregs;
3635 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3636 cum->words += words;
3637 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3639 cum->nregs -= int_nregs;
3640 cum->sse_nregs -= sse_nregs;
3641 cum->regno += int_nregs;
3642 cum->sse_regno += sse_nregs;
3644 else
3645 cum->words += words;
3647 else
3649 switch (mode)
3651 default:
3652 break;
3654 case BLKmode:
3655 if (bytes < 0)
3656 break;
3657 /* FALLTHRU */
3659 case DImode:
3660 case SImode:
3661 case HImode:
3662 case QImode:
3663 cum->words += words;
3664 cum->nregs -= words;
3665 cum->regno += words;
3667 if (cum->nregs <= 0)
3669 cum->nregs = 0;
3670 cum->regno = 0;
3672 break;
3674 case SFmode:
3675 if (cum->float_in_sse > 0)
3676 goto skip_80387;
3678 case DFmode:
3679 if (cum->float_in_sse > 1)
3680 goto skip_80387;
3682 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3683 rounding takes place when values are passed in x87
3684 registers, pass DFmode and SFmode types to local functions
3685 only when flag_unsafe_math_optimizations is set. */
3686 if (!cum->float_in_x87
3687 || (cum->float_in_x87 == 2
3688 && !flag_unsafe_math_optimizations))
3689 break;
3691 case XFmode:
3692 if (!cum->float_in_x87)
3693 break;
3695 if (!type || !AGGREGATE_TYPE_P (type))
3697 cum->x87_nregs -= 1;
3698 cum->x87_regno += 1;
3699 if (cum->x87_nregs <= 0)
3701 cum->x87_nregs = 0;
3702 cum->x87_regno = 0;
3705 break;
3707 skip_80387:
3709 case TImode:
3710 case V16QImode:
3711 case V8HImode:
3712 case V4SImode:
3713 case V2DImode:
3714 case V4SFmode:
3715 case V2DFmode:
3716 if (!type || !AGGREGATE_TYPE_P (type))
3718 cum->sse_nregs -= 1;
3719 cum->sse_regno += 1;
3720 if (cum->sse_nregs <= 0)
3722 cum->sse_nregs = 0;
3723 cum->sse_regno = 0;
3726 break;
3728 case V8QImode:
3729 case V4HImode:
3730 case V2SImode:
3731 case V2SFmode:
3732 if (!type || !AGGREGATE_TYPE_P (type))
3734 cum->mmx_nregs -= 1;
3735 cum->mmx_regno += 1;
3736 if (cum->mmx_nregs <= 0)
3738 cum->mmx_nregs = 0;
3739 cum->mmx_regno = 0;
3742 break;
3747 /* Define where to put the arguments to a function.
3748 Value is zero to push the argument on the stack,
3749 or a hard register in which to store the argument.
3751 MODE is the argument's machine mode.
3752 TYPE is the data type of the argument (as a tree).
3753 This is null for libcalls where that information may
3754 not be available.
3755 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3756 the preceding args and about the function being called.
3757 NAMED is nonzero if this argument is a named parameter
3758 (otherwise it is an extra parameter matching an ellipsis). */
3761 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3762 tree type, int named)
3764 enum machine_mode mode = orig_mode;
3765 rtx ret = NULL_RTX;
3766 int bytes =
3767 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3768 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3769 static bool warnedsse, warnedmmx;
3771 /* To simplify the code below, represent vector types with a vector mode
3772 even if MMX/SSE are not active. */
3773 if (type && TREE_CODE (type) == VECTOR_TYPE)
3774 mode = type_natural_mode (type);
3776 /* Handle a hidden AL argument containing number of registers for varargs
3777 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3778 any AL settings. */
3779 if (mode == VOIDmode)
3781 if (TARGET_64BIT)
3782 return GEN_INT (cum->maybe_vaarg
3783 ? (cum->sse_nregs < 0
3784 ? SSE_REGPARM_MAX
3785 : cum->sse_regno)
3786 : -1);
3787 else
3788 return constm1_rtx;
3790 if (TARGET_64BIT)
3791 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3792 cum->sse_nregs,
3793 &x86_64_int_parameter_registers [cum->regno],
3794 cum->sse_regno);
3795 else
3796 switch (mode)
3798 default:
3799 break;
3801 case BLKmode:
3802 if (bytes < 0)
3803 break;
3804 /* FALLTHRU */
3805 case DImode:
3806 case SImode:
3807 case HImode:
3808 case QImode:
3809 if (words <= cum->nregs)
3811 int regno = cum->regno;
3813 /* Fastcall allocates the first two DWORD (SImode) or
3814 smaller arguments to ECX and EDX. */
3815 if (cum->fastcall)
3817 if (mode == BLKmode || mode == DImode)
3818 break;
3820 /* ECX not EAX is the first allocated register. */
3821 if (regno == 0)
3822 regno = 2;
3824 ret = gen_rtx_REG (mode, regno);
3826 break;
3828 case SFmode:
3829 if (cum->float_in_sse > 0)
3830 goto skip_80387;
3832 case DFmode:
3833 if (cum->float_in_sse > 1)
3834 goto skip_80387;
3836 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3837 rounding takes place when values are passed in x87
3838 registers, pass DFmode and SFmode types to local functions
3839 only when flag_unsafe_math_optimizations is set. */
3840 if (!cum->float_in_x87
3841 || (cum->float_in_x87 == 2
3842 && !flag_unsafe_math_optimizations))
3843 break;
3845 case XFmode:
3846 if (!cum->float_in_x87)
3847 break;
3849 if (!type || !AGGREGATE_TYPE_P (type))
3850 if (cum->x87_nregs)
3851 ret = gen_rtx_REG (mode, cum->x87_regno + FIRST_FLOAT_REG);
3852 break;
3854 skip_80387:
3856 case TImode:
3857 case V16QImode:
3858 case V8HImode:
3859 case V4SImode:
3860 case V2DImode:
3861 case V4SFmode:
3862 case V2DFmode:
3863 if (!type || !AGGREGATE_TYPE_P (type))
3865 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3867 warnedsse = true;
3868 warning (0, "SSE vector argument without SSE enabled "
3869 "changes the ABI");
3871 if (cum->sse_nregs)
3872 ret = gen_reg_or_parallel (mode, orig_mode,
3873 cum->sse_regno + FIRST_SSE_REG);
3875 break;
3876 case V8QImode:
3877 case V4HImode:
3878 case V2SImode:
3879 case V2SFmode:
3880 if (!type || !AGGREGATE_TYPE_P (type))
3882 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3884 warnedmmx = true;
3885 warning (0, "MMX vector argument without MMX enabled "
3886 "changes the ABI");
3888 if (cum->mmx_nregs)
3889 ret = gen_reg_or_parallel (mode, orig_mode,
3890 cum->mmx_regno + FIRST_MMX_REG);
3892 break;
3895 if (TARGET_DEBUG_ARG)
3897 fprintf (stderr,
3898 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3899 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3901 if (ret)
3902 print_simple_rtl (stderr, ret);
3903 else
3904 fprintf (stderr, ", stack");
3906 fprintf (stderr, " )\n");
3909 return ret;
3912 /* A C expression that indicates when an argument must be passed by
3913 reference. If nonzero for an argument, a copy of that argument is
3914 made in memory and a pointer to the argument is passed instead of
3915 the argument itself. The pointer is passed in whatever way is
3916 appropriate for passing a pointer to that type. */
3918 static bool
3919 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3920 enum machine_mode mode ATTRIBUTE_UNUSED,
3921 tree type, bool named ATTRIBUTE_UNUSED)
3923 if (!TARGET_64BIT)
3924 return 0;
3926 if (type && int_size_in_bytes (type) == -1)
3928 if (TARGET_DEBUG_ARG)
3929 fprintf (stderr, "function_arg_pass_by_reference\n");
3930 return 1;
3933 return 0;
3936 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3937 ABI. Only called if TARGET_SSE. */
3938 static bool
3939 contains_128bit_aligned_vector_p (tree type)
3941 enum machine_mode mode = TYPE_MODE (type);
3942 if (SSE_REG_MODE_P (mode)
3943 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3944 return true;
3945 if (TYPE_ALIGN (type) < 128)
3946 return false;
3948 if (AGGREGATE_TYPE_P (type))
3950 /* Walk the aggregates recursively. */
3951 switch (TREE_CODE (type))
3953 case RECORD_TYPE:
3954 case UNION_TYPE:
3955 case QUAL_UNION_TYPE:
3957 tree field;
3959 /* Walk all the structure fields. */
3960 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3962 if (TREE_CODE (field) == FIELD_DECL
3963 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3964 return true;
3966 break;
3969 case ARRAY_TYPE:
3970 /* Just for use if some languages passes arrays by value. */
3971 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3972 return true;
3973 break;
3975 default:
3976 gcc_unreachable ();
3979 return false;
3982 /* Gives the alignment boundary, in bits, of an argument with the
3983 specified mode and type. */
3986 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3988 int align;
3989 if (type)
3990 align = TYPE_ALIGN (type);
3991 else
3992 align = GET_MODE_ALIGNMENT (mode);
3993 if (align < PARM_BOUNDARY)
3994 align = PARM_BOUNDARY;
3995 if (!TARGET_64BIT)
3997 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3998 make an exception for SSE modes since these require 128bit
3999 alignment.
4001 The handling here differs from field_alignment. ICC aligns MMX
4002 arguments to 4 byte boundaries, while structure fields are aligned
4003 to 8 byte boundaries. */
4004 if (!TARGET_SSE)
4005 align = PARM_BOUNDARY;
4006 else if (!type)
4008 if (!SSE_REG_MODE_P (mode))
4009 align = PARM_BOUNDARY;
4011 else
4013 if (!contains_128bit_aligned_vector_p (type))
4014 align = PARM_BOUNDARY;
4017 if (align > 128)
4018 align = 128;
4019 return align;
4022 /* Return true if N is a possible register number of function value. */
4023 bool
4024 ix86_function_value_regno_p (int regno)
4026 if (regno == 0
4027 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4028 || (regno == FIRST_SSE_REG && TARGET_SSE))
4029 return true;
4031 if (!TARGET_64BIT
4032 && (regno == FIRST_MMX_REG && TARGET_MMX))
4033 return true;
4035 return false;
4038 /* Define how to find the value returned by a function.
4039 VALTYPE is the data type of the value (as a tree).
4040 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4041 otherwise, FUNC is 0. */
4043 ix86_function_value (tree valtype, tree fntype_or_decl,
4044 bool outgoing ATTRIBUTE_UNUSED)
4046 enum machine_mode natmode = type_natural_mode (valtype);
4048 if (TARGET_64BIT)
4050 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4051 1, REGPARM_MAX, SSE_REGPARM_MAX,
4052 x86_64_int_return_registers, 0);
4053 /* For zero sized structures, construct_container return NULL, but we
4054 need to keep rest of compiler happy by returning meaningful value. */
4055 if (!ret)
4056 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4057 return ret;
4059 else
4061 tree fn = NULL_TREE, fntype;
4062 if (fntype_or_decl
4063 && DECL_P (fntype_or_decl))
4064 fn = fntype_or_decl;
4065 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4066 return gen_rtx_REG (TYPE_MODE (valtype),
4067 ix86_value_regno (natmode, fn, fntype));
4071 /* Return true iff type is returned in memory. */
4073 ix86_return_in_memory (tree type)
4075 int needed_intregs, needed_sseregs, size;
4076 enum machine_mode mode = type_natural_mode (type);
4078 if (TARGET_64BIT)
4079 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4081 if (mode == BLKmode)
4082 return 1;
4084 size = int_size_in_bytes (type);
4086 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4087 return 0;
4089 if (VECTOR_MODE_P (mode) || mode == TImode)
4091 /* User-created vectors small enough to fit in EAX. */
4092 if (size < 8)
4093 return 0;
4095 /* MMX/3dNow values are returned in MM0,
4096 except when it doesn't exits. */
4097 if (size == 8)
4098 return (TARGET_MMX ? 0 : 1);
4100 /* SSE values are returned in XMM0, except when it doesn't exist. */
4101 if (size == 16)
4102 return (TARGET_SSE ? 0 : 1);
4105 if (mode == XFmode)
4106 return 0;
4108 if (mode == TDmode)
4109 return 1;
4111 if (size > 12)
4112 return 1;
4113 return 0;
4116 /* When returning SSE vector types, we have a choice of either
4117 (1) being abi incompatible with a -march switch, or
4118 (2) generating an error.
4119 Given no good solution, I think the safest thing is one warning.
4120 The user won't be able to use -Werror, but....
4122 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4123 called in response to actually generating a caller or callee that
4124 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4125 via aggregate_value_p for general type probing from tree-ssa. */
4127 static rtx
4128 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4130 static bool warnedsse, warnedmmx;
4132 if (type)
4134 /* Look at the return type of the function, not the function type. */
4135 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4137 if (!TARGET_SSE && !warnedsse)
4139 if (mode == TImode
4140 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4142 warnedsse = true;
4143 warning (0, "SSE vector return without SSE enabled "
4144 "changes the ABI");
4148 if (!TARGET_MMX && !warnedmmx)
4150 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4152 warnedmmx = true;
4153 warning (0, "MMX vector return without MMX enabled "
4154 "changes the ABI");
4159 return NULL;
4162 /* Define how to find the value returned by a library function
4163 assuming the value has mode MODE. */
4165 ix86_libcall_value (enum machine_mode mode)
4167 if (TARGET_64BIT)
4169 switch (mode)
4171 case SFmode:
4172 case SCmode:
4173 case DFmode:
4174 case DCmode:
4175 case TFmode:
4176 case SDmode:
4177 case DDmode:
4178 case TDmode:
4179 return gen_rtx_REG (mode, FIRST_SSE_REG);
4180 case XFmode:
4181 case XCmode:
4182 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4183 case TCmode:
4184 return NULL;
4185 default:
4186 return gen_rtx_REG (mode, 0);
4189 else
4190 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4193 /* Given a mode, return the register to use for a return value. */
4195 static int
4196 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4198 gcc_assert (!TARGET_64BIT);
4200 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4201 we normally prevent this case when mmx is not available. However
4202 some ABIs may require the result to be returned like DImode. */
4203 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4204 return TARGET_MMX ? FIRST_MMX_REG : 0;
4206 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4207 we prevent this case when sse is not available. However some ABIs
4208 may require the result to be returned like integer TImode. */
4209 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4210 return TARGET_SSE ? FIRST_SSE_REG : 0;
4212 /* Decimal floating point values can go in %eax, unlike other float modes. */
4213 if (DECIMAL_FLOAT_MODE_P (mode))
4214 return 0;
4216 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4217 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4218 return 0;
4220 /* Floating point return values in %st(0), except for local functions when
4221 SSE math is enabled or for functions with sseregparm attribute. */
4222 if ((func || fntype)
4223 && (mode == SFmode || mode == DFmode))
4225 int sse_level = ix86_function_sseregparm (fntype, func);
4226 if ((sse_level >= 1 && mode == SFmode)
4227 || (sse_level == 2 && mode == DFmode))
4228 return FIRST_SSE_REG;
4231 return FIRST_FLOAT_REG;
4234 /* Create the va_list data type. */
4236 static tree
4237 ix86_build_builtin_va_list (void)
4239 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4241 /* For i386 we use plain pointer to argument area. */
4242 if (!TARGET_64BIT)
4243 return build_pointer_type (char_type_node);
4245 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4246 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4248 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4249 unsigned_type_node);
4250 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4251 unsigned_type_node);
4252 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4253 ptr_type_node);
4254 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4255 ptr_type_node);
4257 va_list_gpr_counter_field = f_gpr;
4258 va_list_fpr_counter_field = f_fpr;
4260 DECL_FIELD_CONTEXT (f_gpr) = record;
4261 DECL_FIELD_CONTEXT (f_fpr) = record;
4262 DECL_FIELD_CONTEXT (f_ovf) = record;
4263 DECL_FIELD_CONTEXT (f_sav) = record;
4265 TREE_CHAIN (record) = type_decl;
4266 TYPE_NAME (record) = type_decl;
4267 TYPE_FIELDS (record) = f_gpr;
4268 TREE_CHAIN (f_gpr) = f_fpr;
4269 TREE_CHAIN (f_fpr) = f_ovf;
4270 TREE_CHAIN (f_ovf) = f_sav;
4272 layout_type (record);
4274 /* The correct type is an array type of one element. */
4275 return build_array_type (record, build_index_type (size_zero_node));
4278 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4280 static void
4281 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4282 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4283 int no_rtl)
4285 CUMULATIVE_ARGS next_cum;
4286 rtx save_area = NULL_RTX, mem;
4287 rtx label;
4288 rtx label_ref;
4289 rtx tmp_reg;
4290 rtx nsse_reg;
4291 int set;
4292 tree fntype;
4293 int stdarg_p;
4294 int i;
4296 if (!TARGET_64BIT)
4297 return;
4299 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4300 return;
4302 /* Indicate to allocate space on the stack for varargs save area. */
4303 ix86_save_varrargs_registers = 1;
4305 cfun->stack_alignment_needed = 128;
4307 fntype = TREE_TYPE (current_function_decl);
4308 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4309 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4310 != void_type_node));
4312 /* For varargs, we do not want to skip the dummy va_dcl argument.
4313 For stdargs, we do want to skip the last named argument. */
4314 next_cum = *cum;
4315 if (stdarg_p)
4316 function_arg_advance (&next_cum, mode, type, 1);
4318 if (!no_rtl)
4319 save_area = frame_pointer_rtx;
4321 set = get_varargs_alias_set ();
4323 for (i = next_cum.regno;
4324 i < ix86_regparm
4325 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4326 i++)
4328 mem = gen_rtx_MEM (Pmode,
4329 plus_constant (save_area, i * UNITS_PER_WORD));
4330 MEM_NOTRAP_P (mem) = 1;
4331 set_mem_alias_set (mem, set);
4332 emit_move_insn (mem, gen_rtx_REG (Pmode,
4333 x86_64_int_parameter_registers[i]));
4336 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4338 /* Now emit code to save SSE registers. The AX parameter contains number
4339 of SSE parameter registers used to call this function. We use
4340 sse_prologue_save insn template that produces computed jump across
4341 SSE saves. We need some preparation work to get this working. */
4343 label = gen_label_rtx ();
4344 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4346 /* Compute address to jump to :
4347 label - 5*eax + nnamed_sse_arguments*5 */
4348 tmp_reg = gen_reg_rtx (Pmode);
4349 nsse_reg = gen_reg_rtx (Pmode);
4350 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4351 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4352 gen_rtx_MULT (Pmode, nsse_reg,
4353 GEN_INT (4))));
4354 if (next_cum.sse_regno)
4355 emit_move_insn
4356 (nsse_reg,
4357 gen_rtx_CONST (DImode,
4358 gen_rtx_PLUS (DImode,
4359 label_ref,
4360 GEN_INT (next_cum.sse_regno * 4))));
4361 else
4362 emit_move_insn (nsse_reg, label_ref);
4363 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4365 /* Compute address of memory block we save into. We always use pointer
4366 pointing 127 bytes after first byte to store - this is needed to keep
4367 instruction size limited by 4 bytes. */
4368 tmp_reg = gen_reg_rtx (Pmode);
4369 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4370 plus_constant (save_area,
4371 8 * REGPARM_MAX + 127)));
4372 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4373 MEM_NOTRAP_P (mem) = 1;
4374 set_mem_alias_set (mem, set);
4375 set_mem_align (mem, BITS_PER_WORD);
4377 /* And finally do the dirty job! */
4378 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4379 GEN_INT (next_cum.sse_regno), label));
4384 /* Implement va_start. */
4386 void
4387 ix86_va_start (tree valist, rtx nextarg)
4389 HOST_WIDE_INT words, n_gpr, n_fpr;
4390 tree f_gpr, f_fpr, f_ovf, f_sav;
4391 tree gpr, fpr, ovf, sav, t;
4392 tree type;
4394 /* Only 64bit target needs something special. */
4395 if (!TARGET_64BIT)
4397 std_expand_builtin_va_start (valist, nextarg);
4398 return;
4401 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4402 f_fpr = TREE_CHAIN (f_gpr);
4403 f_ovf = TREE_CHAIN (f_fpr);
4404 f_sav = TREE_CHAIN (f_ovf);
4406 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4407 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4408 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4409 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4410 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4412 /* Count number of gp and fp argument registers used. */
4413 words = current_function_args_info.words;
4414 n_gpr = current_function_args_info.regno;
4415 n_fpr = current_function_args_info.sse_regno;
4417 if (TARGET_DEBUG_ARG)
4418 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4419 (int) words, (int) n_gpr, (int) n_fpr);
4421 if (cfun->va_list_gpr_size)
4423 type = TREE_TYPE (gpr);
4424 t = build2 (MODIFY_EXPR, type, gpr,
4425 build_int_cst (type, n_gpr * 8));
4426 TREE_SIDE_EFFECTS (t) = 1;
4427 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4430 if (cfun->va_list_fpr_size)
4432 type = TREE_TYPE (fpr);
4433 t = build2 (MODIFY_EXPR, type, fpr,
4434 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4435 TREE_SIDE_EFFECTS (t) = 1;
4436 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4439 /* Find the overflow area. */
4440 type = TREE_TYPE (ovf);
4441 t = make_tree (type, virtual_incoming_args_rtx);
4442 if (words != 0)
4443 t = build2 (PLUS_EXPR, type, t,
4444 build_int_cst (type, words * UNITS_PER_WORD));
4445 t = build2 (MODIFY_EXPR, type, ovf, t);
4446 TREE_SIDE_EFFECTS (t) = 1;
4447 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4449 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4451 /* Find the register save area.
4452 Prologue of the function save it right above stack frame. */
4453 type = TREE_TYPE (sav);
4454 t = make_tree (type, frame_pointer_rtx);
4455 t = build2 (MODIFY_EXPR, type, sav, t);
4456 TREE_SIDE_EFFECTS (t) = 1;
4457 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4461 /* Implement va_arg. */
4463 tree
4464 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4466 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4467 tree f_gpr, f_fpr, f_ovf, f_sav;
4468 tree gpr, fpr, ovf, sav, t;
4469 int size, rsize;
4470 tree lab_false, lab_over = NULL_TREE;
4471 tree addr, t2;
4472 rtx container;
4473 int indirect_p = 0;
4474 tree ptrtype;
4475 enum machine_mode nat_mode;
4477 /* Only 64bit target needs something special. */
4478 if (!TARGET_64BIT)
4479 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4481 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4482 f_fpr = TREE_CHAIN (f_gpr);
4483 f_ovf = TREE_CHAIN (f_fpr);
4484 f_sav = TREE_CHAIN (f_ovf);
4486 valist = build_va_arg_indirect_ref (valist);
4487 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4488 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4489 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4490 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4492 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4493 if (indirect_p)
4494 type = build_pointer_type (type);
4495 size = int_size_in_bytes (type);
4496 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4498 nat_mode = type_natural_mode (type);
4499 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4500 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4502 /* Pull the value out of the saved registers. */
4504 addr = create_tmp_var (ptr_type_node, "addr");
4505 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4507 if (container)
4509 int needed_intregs, needed_sseregs;
4510 bool need_temp;
4511 tree int_addr, sse_addr;
4513 lab_false = create_artificial_label ();
4514 lab_over = create_artificial_label ();
4516 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4518 need_temp = (!REG_P (container)
4519 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4520 || TYPE_ALIGN (type) > 128));
4522 /* In case we are passing structure, verify that it is consecutive block
4523 on the register save area. If not we need to do moves. */
4524 if (!need_temp && !REG_P (container))
4526 /* Verify that all registers are strictly consecutive */
4527 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4529 int i;
4531 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4533 rtx slot = XVECEXP (container, 0, i);
4534 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4535 || INTVAL (XEXP (slot, 1)) != i * 16)
4536 need_temp = 1;
4539 else
4541 int i;
4543 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4545 rtx slot = XVECEXP (container, 0, i);
4546 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4547 || INTVAL (XEXP (slot, 1)) != i * 8)
4548 need_temp = 1;
4552 if (!need_temp)
4554 int_addr = addr;
4555 sse_addr = addr;
4557 else
4559 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4560 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4561 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4562 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4565 /* First ensure that we fit completely in registers. */
4566 if (needed_intregs)
4568 t = build_int_cst (TREE_TYPE (gpr),
4569 (REGPARM_MAX - needed_intregs + 1) * 8);
4570 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4571 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4572 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4573 gimplify_and_add (t, pre_p);
4575 if (needed_sseregs)
4577 t = build_int_cst (TREE_TYPE (fpr),
4578 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4579 + REGPARM_MAX * 8);
4580 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4581 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4582 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4583 gimplify_and_add (t, pre_p);
4586 /* Compute index to start of area used for integer regs. */
4587 if (needed_intregs)
4589 /* int_addr = gpr + sav; */
4590 t = fold_convert (ptr_type_node, gpr);
4591 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4592 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4593 gimplify_and_add (t, pre_p);
4595 if (needed_sseregs)
4597 /* sse_addr = fpr + sav; */
4598 t = fold_convert (ptr_type_node, fpr);
4599 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4600 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4601 gimplify_and_add (t, pre_p);
4603 if (need_temp)
4605 int i;
4606 tree temp = create_tmp_var (type, "va_arg_tmp");
4608 /* addr = &temp; */
4609 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4610 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4611 gimplify_and_add (t, pre_p);
4613 for (i = 0; i < XVECLEN (container, 0); i++)
4615 rtx slot = XVECEXP (container, 0, i);
4616 rtx reg = XEXP (slot, 0);
4617 enum machine_mode mode = GET_MODE (reg);
4618 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4619 tree addr_type = build_pointer_type (piece_type);
4620 tree src_addr, src;
4621 int src_offset;
4622 tree dest_addr, dest;
4624 if (SSE_REGNO_P (REGNO (reg)))
4626 src_addr = sse_addr;
4627 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4629 else
4631 src_addr = int_addr;
4632 src_offset = REGNO (reg) * 8;
4634 src_addr = fold_convert (addr_type, src_addr);
4635 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4636 size_int (src_offset)));
4637 src = build_va_arg_indirect_ref (src_addr);
4639 dest_addr = fold_convert (addr_type, addr);
4640 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4641 size_int (INTVAL (XEXP (slot, 1)))));
4642 dest = build_va_arg_indirect_ref (dest_addr);
4644 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4645 gimplify_and_add (t, pre_p);
4649 if (needed_intregs)
4651 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4652 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4653 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4654 gimplify_and_add (t, pre_p);
4656 if (needed_sseregs)
4658 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4659 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4660 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4661 gimplify_and_add (t, pre_p);
4664 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4665 gimplify_and_add (t, pre_p);
4667 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4668 append_to_statement_list (t, pre_p);
4671 /* ... otherwise out of the overflow area. */
4673 /* Care for on-stack alignment if needed. */
4674 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4675 || integer_zerop (TYPE_SIZE (type)))
4676 t = ovf;
4677 else
4679 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4680 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4681 build_int_cst (TREE_TYPE (ovf), align - 1));
4682 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4683 build_int_cst (TREE_TYPE (t), -align));
4685 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4687 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4688 gimplify_and_add (t2, pre_p);
4690 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4691 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4692 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4693 gimplify_and_add (t, pre_p);
4695 if (container)
4697 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4698 append_to_statement_list (t, pre_p);
4701 ptrtype = build_pointer_type (type);
4702 addr = fold_convert (ptrtype, addr);
4704 if (indirect_p)
4705 addr = build_va_arg_indirect_ref (addr);
4706 return build_va_arg_indirect_ref (addr);
4709 /* Return nonzero if OPNUM's MEM should be matched
4710 in movabs* patterns. */
4713 ix86_check_movabs (rtx insn, int opnum)
4715 rtx set, mem;
4717 set = PATTERN (insn);
4718 if (GET_CODE (set) == PARALLEL)
4719 set = XVECEXP (set, 0, 0);
4720 gcc_assert (GET_CODE (set) == SET);
4721 mem = XEXP (set, opnum);
4722 while (GET_CODE (mem) == SUBREG)
4723 mem = SUBREG_REG (mem);
4724 gcc_assert (GET_CODE (mem) == MEM);
4725 return (volatile_ok || !MEM_VOLATILE_P (mem));
4728 /* Initialize the table of extra 80387 mathematical constants. */
4730 static void
4731 init_ext_80387_constants (void)
4733 static const char * cst[5] =
4735 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4736 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4737 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4738 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4739 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4741 int i;
4743 for (i = 0; i < 5; i++)
4745 real_from_string (&ext_80387_constants_table[i], cst[i]);
4746 /* Ensure each constant is rounded to XFmode precision. */
4747 real_convert (&ext_80387_constants_table[i],
4748 XFmode, &ext_80387_constants_table[i]);
4751 ext_80387_constants_init = 1;
4754 /* Return true if the constant is something that can be loaded with
4755 a special instruction. */
4758 standard_80387_constant_p (rtx x)
4760 REAL_VALUE_TYPE r;
4762 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4763 return -1;
4765 if (x == CONST0_RTX (GET_MODE (x)))
4766 return 1;
4767 if (x == CONST1_RTX (GET_MODE (x)))
4768 return 2;
4770 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4772 /* For XFmode constants, try to find a special 80387 instruction when
4773 optimizing for size or on those CPUs that benefit from them. */
4774 if (GET_MODE (x) == XFmode
4775 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4777 int i;
4779 if (! ext_80387_constants_init)
4780 init_ext_80387_constants ();
4782 for (i = 0; i < 5; i++)
4783 if (real_identical (&r, &ext_80387_constants_table[i]))
4784 return i + 3;
4787 /* Load of the constant -0.0 or -1.0 will be split as
4788 fldz;fchs or fld1;fchs sequence. */
4789 if (real_isnegzero (&r))
4790 return 8;
4791 if (real_identical (&r, &dconstm1))
4792 return 9;
4794 return 0;
4797 /* Return the opcode of the special instruction to be used to load
4798 the constant X. */
4800 const char *
4801 standard_80387_constant_opcode (rtx x)
4803 switch (standard_80387_constant_p (x))
4805 case 1:
4806 return "fldz";
4807 case 2:
4808 return "fld1";
4809 case 3:
4810 return "fldlg2";
4811 case 4:
4812 return "fldln2";
4813 case 5:
4814 return "fldl2e";
4815 case 6:
4816 return "fldl2t";
4817 case 7:
4818 return "fldpi";
4819 case 8:
4820 case 9:
4821 return "#";
4822 default:
4823 gcc_unreachable ();
4827 /* Return the CONST_DOUBLE representing the 80387 constant that is
4828 loaded by the specified special instruction. The argument IDX
4829 matches the return value from standard_80387_constant_p. */
4832 standard_80387_constant_rtx (int idx)
4834 int i;
4836 if (! ext_80387_constants_init)
4837 init_ext_80387_constants ();
4839 switch (idx)
4841 case 3:
4842 case 4:
4843 case 5:
4844 case 6:
4845 case 7:
4846 i = idx - 3;
4847 break;
4849 default:
4850 gcc_unreachable ();
4853 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4854 XFmode);
4857 /* Return 1 if mode is a valid mode for sse. */
4858 static int
4859 standard_sse_mode_p (enum machine_mode mode)
4861 switch (mode)
4863 case V16QImode:
4864 case V8HImode:
4865 case V4SImode:
4866 case V2DImode:
4867 case V4SFmode:
4868 case V2DFmode:
4869 return 1;
4871 default:
4872 return 0;
4876 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4879 standard_sse_constant_p (rtx x)
4881 enum machine_mode mode = GET_MODE (x);
4883 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4884 return 1;
4885 if (vector_all_ones_operand (x, mode)
4886 && standard_sse_mode_p (mode))
4887 return TARGET_SSE2 ? 2 : -1;
4889 return 0;
4892 /* Return the opcode of the special instruction to be used to load
4893 the constant X. */
4895 const char *
4896 standard_sse_constant_opcode (rtx insn, rtx x)
4898 switch (standard_sse_constant_p (x))
4900 case 1:
4901 if (get_attr_mode (insn) == MODE_V4SF)
4902 return "xorps\t%0, %0";
4903 else if (get_attr_mode (insn) == MODE_V2DF)
4904 return "xorpd\t%0, %0";
4905 else
4906 return "pxor\t%0, %0";
4907 case 2:
4908 return "pcmpeqd\t%0, %0";
4910 gcc_unreachable ();
4913 /* Returns 1 if OP contains a symbol reference */
4916 symbolic_reference_mentioned_p (rtx op)
4918 const char *fmt;
4919 int i;
4921 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4922 return 1;
4924 fmt = GET_RTX_FORMAT (GET_CODE (op));
4925 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4927 if (fmt[i] == 'E')
4929 int j;
4931 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4932 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4933 return 1;
4936 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4937 return 1;
4940 return 0;
4943 /* Return 1 if it is appropriate to emit `ret' instructions in the
4944 body of a function. Do this only if the epilogue is simple, needing a
4945 couple of insns. Prior to reloading, we can't tell how many registers
4946 must be saved, so return 0 then. Return 0 if there is no frame
4947 marker to de-allocate. */
4950 ix86_can_use_return_insn_p (void)
4952 struct ix86_frame frame;
4954 if (! reload_completed || frame_pointer_needed)
4955 return 0;
4957 /* Don't allow more than 32 pop, since that's all we can do
4958 with one instruction. */
4959 if (current_function_pops_args
4960 && current_function_args_size >= 32768)
4961 return 0;
4963 ix86_compute_frame_layout (&frame);
4964 return frame.to_allocate == 0 && frame.nregs == 0;
4967 /* Value should be nonzero if functions must have frame pointers.
4968 Zero means the frame pointer need not be set up (and parms may
4969 be accessed via the stack pointer) in functions that seem suitable. */
4972 ix86_frame_pointer_required (void)
4974 /* If we accessed previous frames, then the generated code expects
4975 to be able to access the saved ebp value in our frame. */
4976 if (cfun->machine->accesses_prev_frame)
4977 return 1;
4979 /* Several x86 os'es need a frame pointer for other reasons,
4980 usually pertaining to setjmp. */
4981 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4982 return 1;
4984 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4985 the frame pointer by default. Turn it back on now if we've not
4986 got a leaf function. */
4987 if (TARGET_OMIT_LEAF_FRAME_POINTER
4988 && (!current_function_is_leaf
4989 || ix86_current_function_calls_tls_descriptor))
4990 return 1;
4992 if (current_function_profile)
4993 return 1;
4995 return 0;
4998 /* Record that the current function accesses previous call frames. */
5000 void
5001 ix86_setup_frame_addresses (void)
5003 cfun->machine->accesses_prev_frame = 1;
5006 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5007 # define USE_HIDDEN_LINKONCE 1
5008 #else
5009 # define USE_HIDDEN_LINKONCE 0
5010 #endif
5012 static int pic_labels_used;
5014 /* Fills in the label name that should be used for a pc thunk for
5015 the given register. */
5017 static void
5018 get_pc_thunk_name (char name[32], unsigned int regno)
5020 gcc_assert (!TARGET_64BIT);
5022 if (USE_HIDDEN_LINKONCE)
5023 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5024 else
5025 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5029 /* This function generates code for -fpic that loads %ebx with
5030 the return address of the caller and then returns. */
5032 void
5033 ix86_file_end (void)
5035 rtx xops[2];
5036 int regno;
5038 for (regno = 0; regno < 8; ++regno)
5040 char name[32];
5042 if (! ((pic_labels_used >> regno) & 1))
5043 continue;
5045 get_pc_thunk_name (name, regno);
5047 #if TARGET_MACHO
5048 if (TARGET_MACHO)
5050 switch_to_section (darwin_sections[text_coal_section]);
5051 fputs ("\t.weak_definition\t", asm_out_file);
5052 assemble_name (asm_out_file, name);
5053 fputs ("\n\t.private_extern\t", asm_out_file);
5054 assemble_name (asm_out_file, name);
5055 fputs ("\n", asm_out_file);
5056 ASM_OUTPUT_LABEL (asm_out_file, name);
5058 else
5059 #endif
5060 if (USE_HIDDEN_LINKONCE)
5062 tree decl;
5064 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5065 error_mark_node);
5066 TREE_PUBLIC (decl) = 1;
5067 TREE_STATIC (decl) = 1;
5068 DECL_ONE_ONLY (decl) = 1;
5070 (*targetm.asm_out.unique_section) (decl, 0);
5071 switch_to_section (get_named_section (decl, NULL, 0));
5073 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5074 fputs ("\t.hidden\t", asm_out_file);
5075 assemble_name (asm_out_file, name);
5076 fputc ('\n', asm_out_file);
5077 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5079 else
5081 switch_to_section (text_section);
5082 ASM_OUTPUT_LABEL (asm_out_file, name);
5085 xops[0] = gen_rtx_REG (SImode, regno);
5086 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5087 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5088 output_asm_insn ("ret", xops);
5091 if (NEED_INDICATE_EXEC_STACK)
5092 file_end_indicate_exec_stack ();
5095 /* Emit code for the SET_GOT patterns. */
5097 const char *
5098 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5100 rtx xops[3];
5102 xops[0] = dest;
5103 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5105 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5107 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5109 if (!flag_pic)
5110 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5111 else
5112 output_asm_insn ("call\t%a2", xops);
5114 #if TARGET_MACHO
5115 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5116 is what will be referenced by the Mach-O PIC subsystem. */
5117 if (!label)
5118 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5119 #endif
5121 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5122 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5124 if (flag_pic)
5125 output_asm_insn ("pop{l}\t%0", xops);
5127 else
5129 char name[32];
5130 get_pc_thunk_name (name, REGNO (dest));
5131 pic_labels_used |= 1 << REGNO (dest);
5133 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5134 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5135 output_asm_insn ("call\t%X2", xops);
5136 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5137 is what will be referenced by the Mach-O PIC subsystem. */
5138 #if TARGET_MACHO
5139 if (!label)
5140 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5141 else
5142 targetm.asm_out.internal_label (asm_out_file, "L",
5143 CODE_LABEL_NUMBER (label));
5144 #endif
5147 if (TARGET_MACHO)
5148 return "";
5150 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5151 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5152 else
5153 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5155 return "";
5158 /* Generate an "push" pattern for input ARG. */
5160 static rtx
5161 gen_push (rtx arg)
5163 return gen_rtx_SET (VOIDmode,
5164 gen_rtx_MEM (Pmode,
5165 gen_rtx_PRE_DEC (Pmode,
5166 stack_pointer_rtx)),
5167 arg);
5170 /* Return >= 0 if there is an unused call-clobbered register available
5171 for the entire function. */
5173 static unsigned int
5174 ix86_select_alt_pic_regnum (void)
5176 if (current_function_is_leaf && !current_function_profile
5177 && !ix86_current_function_calls_tls_descriptor)
5179 int i;
5180 for (i = 2; i >= 0; --i)
5181 if (!regs_ever_live[i])
5182 return i;
5185 return INVALID_REGNUM;
5188 /* Return 1 if we need to save REGNO. */
5189 static int
5190 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5192 if (pic_offset_table_rtx
5193 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5194 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5195 || current_function_profile
5196 || current_function_calls_eh_return
5197 || current_function_uses_const_pool))
5199 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5200 return 0;
5201 return 1;
5204 if (current_function_calls_eh_return && maybe_eh_return)
5206 unsigned i;
5207 for (i = 0; ; i++)
5209 unsigned test = EH_RETURN_DATA_REGNO (i);
5210 if (test == INVALID_REGNUM)
5211 break;
5212 if (test == regno)
5213 return 1;
5217 if (cfun->machine->force_align_arg_pointer
5218 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5219 return 1;
5221 return (regs_ever_live[regno]
5222 && !call_used_regs[regno]
5223 && !fixed_regs[regno]
5224 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5227 /* Return number of registers to be saved on the stack. */
5229 static int
5230 ix86_nsaved_regs (void)
5232 int nregs = 0;
5233 int regno;
5235 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5236 if (ix86_save_reg (regno, true))
5237 nregs++;
5238 return nregs;
5241 /* Return the offset between two registers, one to be eliminated, and the other
5242 its replacement, at the start of a routine. */
5244 HOST_WIDE_INT
5245 ix86_initial_elimination_offset (int from, int to)
5247 struct ix86_frame frame;
5248 ix86_compute_frame_layout (&frame);
5250 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5251 return frame.hard_frame_pointer_offset;
5252 else if (from == FRAME_POINTER_REGNUM
5253 && to == HARD_FRAME_POINTER_REGNUM)
5254 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5255 else
5257 gcc_assert (to == STACK_POINTER_REGNUM);
5259 if (from == ARG_POINTER_REGNUM)
5260 return frame.stack_pointer_offset;
5262 gcc_assert (from == FRAME_POINTER_REGNUM);
5263 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5267 /* Fill structure ix86_frame about frame of currently computed function. */
5269 static void
5270 ix86_compute_frame_layout (struct ix86_frame *frame)
5272 HOST_WIDE_INT total_size;
5273 unsigned int stack_alignment_needed;
5274 HOST_WIDE_INT offset;
5275 unsigned int preferred_alignment;
5276 HOST_WIDE_INT size = get_frame_size ();
5278 frame->nregs = ix86_nsaved_regs ();
5279 total_size = size;
5281 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5282 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5284 /* During reload iteration the amount of registers saved can change.
5285 Recompute the value as needed. Do not recompute when amount of registers
5286 didn't change as reload does multiple calls to the function and does not
5287 expect the decision to change within single iteration. */
5288 if (!optimize_size
5289 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5291 int count = frame->nregs;
5293 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5294 /* The fast prologue uses move instead of push to save registers. This
5295 is significantly longer, but also executes faster as modern hardware
5296 can execute the moves in parallel, but can't do that for push/pop.
5298 Be careful about choosing what prologue to emit: When function takes
5299 many instructions to execute we may use slow version as well as in
5300 case function is known to be outside hot spot (this is known with
5301 feedback only). Weight the size of function by number of registers
5302 to save as it is cheap to use one or two push instructions but very
5303 slow to use many of them. */
5304 if (count)
5305 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5306 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5307 || (flag_branch_probabilities
5308 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5309 cfun->machine->use_fast_prologue_epilogue = false;
5310 else
5311 cfun->machine->use_fast_prologue_epilogue
5312 = !expensive_function_p (count);
5314 if (TARGET_PROLOGUE_USING_MOVE
5315 && cfun->machine->use_fast_prologue_epilogue)
5316 frame->save_regs_using_mov = true;
5317 else
5318 frame->save_regs_using_mov = false;
5321 /* Skip return address and saved base pointer. */
5322 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5324 frame->hard_frame_pointer_offset = offset;
5326 /* Do some sanity checking of stack_alignment_needed and
5327 preferred_alignment, since i386 port is the only using those features
5328 that may break easily. */
5330 gcc_assert (!size || stack_alignment_needed);
5331 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5332 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5333 gcc_assert (stack_alignment_needed
5334 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5336 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5337 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5339 /* Register save area */
5340 offset += frame->nregs * UNITS_PER_WORD;
5342 /* Va-arg area */
5343 if (ix86_save_varrargs_registers)
5345 offset += X86_64_VARARGS_SIZE;
5346 frame->va_arg_size = X86_64_VARARGS_SIZE;
5348 else
5349 frame->va_arg_size = 0;
5351 /* Align start of frame for local function. */
5352 frame->padding1 = ((offset + stack_alignment_needed - 1)
5353 & -stack_alignment_needed) - offset;
5355 offset += frame->padding1;
5357 /* Frame pointer points here. */
5358 frame->frame_pointer_offset = offset;
5360 offset += size;
5362 /* Add outgoing arguments area. Can be skipped if we eliminated
5363 all the function calls as dead code.
5364 Skipping is however impossible when function calls alloca. Alloca
5365 expander assumes that last current_function_outgoing_args_size
5366 of stack frame are unused. */
5367 if (ACCUMULATE_OUTGOING_ARGS
5368 && (!current_function_is_leaf || current_function_calls_alloca
5369 || ix86_current_function_calls_tls_descriptor))
5371 offset += current_function_outgoing_args_size;
5372 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5374 else
5375 frame->outgoing_arguments_size = 0;
5377 /* Align stack boundary. Only needed if we're calling another function
5378 or using alloca. */
5379 if (!current_function_is_leaf || current_function_calls_alloca
5380 || ix86_current_function_calls_tls_descriptor)
5381 frame->padding2 = ((offset + preferred_alignment - 1)
5382 & -preferred_alignment) - offset;
5383 else
5384 frame->padding2 = 0;
5386 offset += frame->padding2;
5388 /* We've reached end of stack frame. */
5389 frame->stack_pointer_offset = offset;
5391 /* Size prologue needs to allocate. */
5392 frame->to_allocate =
5393 (size + frame->padding1 + frame->padding2
5394 + frame->outgoing_arguments_size + frame->va_arg_size);
5396 if ((!frame->to_allocate && frame->nregs <= 1)
5397 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5398 frame->save_regs_using_mov = false;
5400 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5401 && current_function_is_leaf
5402 && !ix86_current_function_calls_tls_descriptor)
5404 frame->red_zone_size = frame->to_allocate;
5405 if (frame->save_regs_using_mov)
5406 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5407 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5408 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5410 else
5411 frame->red_zone_size = 0;
5412 frame->to_allocate -= frame->red_zone_size;
5413 frame->stack_pointer_offset -= frame->red_zone_size;
5414 #if 0
5415 fprintf (stderr, "nregs: %i\n", frame->nregs);
5416 fprintf (stderr, "size: %i\n", size);
5417 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5418 fprintf (stderr, "padding1: %i\n", frame->padding1);
5419 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5420 fprintf (stderr, "padding2: %i\n", frame->padding2);
5421 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5422 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5423 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5424 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5425 frame->hard_frame_pointer_offset);
5426 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5427 #endif
5430 /* Emit code to save registers in the prologue. */
5432 static void
5433 ix86_emit_save_regs (void)
5435 unsigned int regno;
5436 rtx insn;
5438 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5439 if (ix86_save_reg (regno, true))
5441 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5442 RTX_FRAME_RELATED_P (insn) = 1;
5446 /* Emit code to save registers using MOV insns. First register
5447 is restored from POINTER + OFFSET. */
5448 static void
5449 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5451 unsigned int regno;
5452 rtx insn;
5454 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5455 if (ix86_save_reg (regno, true))
5457 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5458 Pmode, offset),
5459 gen_rtx_REG (Pmode, regno));
5460 RTX_FRAME_RELATED_P (insn) = 1;
5461 offset += UNITS_PER_WORD;
5465 /* Expand prologue or epilogue stack adjustment.
5466 The pattern exist to put a dependency on all ebp-based memory accesses.
5467 STYLE should be negative if instructions should be marked as frame related,
5468 zero if %r11 register is live and cannot be freely used and positive
5469 otherwise. */
5471 static void
5472 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5474 rtx insn;
5476 if (! TARGET_64BIT)
5477 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5478 else if (x86_64_immediate_operand (offset, DImode))
5479 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5480 else
5482 rtx r11;
5483 /* r11 is used by indirect sibcall return as well, set before the
5484 epilogue and used after the epilogue. ATM indirect sibcall
5485 shouldn't be used together with huge frame sizes in one
5486 function because of the frame_size check in sibcall.c. */
5487 gcc_assert (style);
5488 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5489 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5490 if (style < 0)
5491 RTX_FRAME_RELATED_P (insn) = 1;
5492 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5493 offset));
5495 if (style < 0)
5496 RTX_FRAME_RELATED_P (insn) = 1;
5499 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5501 static rtx
5502 ix86_internal_arg_pointer (void)
5504 bool has_force_align_arg_pointer =
5505 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5506 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5507 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5508 && DECL_NAME (current_function_decl)
5509 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5510 && DECL_FILE_SCOPE_P (current_function_decl))
5511 || ix86_force_align_arg_pointer
5512 || has_force_align_arg_pointer)
5514 /* Nested functions can't realign the stack due to a register
5515 conflict. */
5516 if (DECL_CONTEXT (current_function_decl)
5517 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5519 if (ix86_force_align_arg_pointer)
5520 warning (0, "-mstackrealign ignored for nested functions");
5521 if (has_force_align_arg_pointer)
5522 error ("%s not supported for nested functions",
5523 ix86_force_align_arg_pointer_string);
5524 return virtual_incoming_args_rtx;
5526 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5527 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5529 else
5530 return virtual_incoming_args_rtx;
5533 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5534 This is called from dwarf2out.c to emit call frame instructions
5535 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5536 static void
5537 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5539 rtx unspec = SET_SRC (pattern);
5540 gcc_assert (GET_CODE (unspec) == UNSPEC);
5542 switch (index)
5544 case UNSPEC_REG_SAVE:
5545 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5546 SET_DEST (pattern));
5547 break;
5548 case UNSPEC_DEF_CFA:
5549 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5550 INTVAL (XVECEXP (unspec, 0, 0)));
5551 break;
5552 default:
5553 gcc_unreachable ();
5557 /* Expand the prologue into a bunch of separate insns. */
5559 void
5560 ix86_expand_prologue (void)
5562 rtx insn;
5563 bool pic_reg_used;
5564 struct ix86_frame frame;
5565 HOST_WIDE_INT allocate;
5567 ix86_compute_frame_layout (&frame);
5569 if (cfun->machine->force_align_arg_pointer)
5571 rtx x, y;
5573 /* Grab the argument pointer. */
5574 x = plus_constant (stack_pointer_rtx, 4);
5575 y = cfun->machine->force_align_arg_pointer;
5576 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5577 RTX_FRAME_RELATED_P (insn) = 1;
5579 /* The unwind info consists of two parts: install the fafp as the cfa,
5580 and record the fafp as the "save register" of the stack pointer.
5581 The later is there in order that the unwinder can see where it
5582 should restore the stack pointer across the and insn. */
5583 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5584 x = gen_rtx_SET (VOIDmode, y, x);
5585 RTX_FRAME_RELATED_P (x) = 1;
5586 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5587 UNSPEC_REG_SAVE);
5588 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5589 RTX_FRAME_RELATED_P (y) = 1;
5590 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5591 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5592 REG_NOTES (insn) = x;
5594 /* Align the stack. */
5595 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5596 GEN_INT (-16)));
5598 /* And here we cheat like madmen with the unwind info. We force the
5599 cfa register back to sp+4, which is exactly what it was at the
5600 start of the function. Re-pushing the return address results in
5601 the return at the same spot relative to the cfa, and thus is
5602 correct wrt the unwind info. */
5603 x = cfun->machine->force_align_arg_pointer;
5604 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5605 insn = emit_insn (gen_push (x));
5606 RTX_FRAME_RELATED_P (insn) = 1;
5608 x = GEN_INT (4);
5609 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5610 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5611 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5612 REG_NOTES (insn) = x;
5615 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5616 slower on all targets. Also sdb doesn't like it. */
5618 if (frame_pointer_needed)
5620 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5621 RTX_FRAME_RELATED_P (insn) = 1;
5623 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5624 RTX_FRAME_RELATED_P (insn) = 1;
5627 allocate = frame.to_allocate;
5629 if (!frame.save_regs_using_mov)
5630 ix86_emit_save_regs ();
5631 else
5632 allocate += frame.nregs * UNITS_PER_WORD;
5634 /* When using red zone we may start register saving before allocating
5635 the stack frame saving one cycle of the prologue. */
5636 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5637 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5638 : stack_pointer_rtx,
5639 -frame.nregs * UNITS_PER_WORD);
5641 if (allocate == 0)
5643 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5644 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5645 GEN_INT (-allocate), -1);
5646 else
5648 /* Only valid for Win32. */
5649 rtx eax = gen_rtx_REG (SImode, 0);
5650 bool eax_live = ix86_eax_live_at_start_p ();
5651 rtx t;
5653 gcc_assert (!TARGET_64BIT);
5655 if (eax_live)
5657 emit_insn (gen_push (eax));
5658 allocate -= 4;
5661 emit_move_insn (eax, GEN_INT (allocate));
5663 insn = emit_insn (gen_allocate_stack_worker (eax));
5664 RTX_FRAME_RELATED_P (insn) = 1;
5665 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5666 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5667 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5668 t, REG_NOTES (insn));
5670 if (eax_live)
5672 if (frame_pointer_needed)
5673 t = plus_constant (hard_frame_pointer_rtx,
5674 allocate
5675 - frame.to_allocate
5676 - frame.nregs * UNITS_PER_WORD);
5677 else
5678 t = plus_constant (stack_pointer_rtx, allocate);
5679 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5683 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5685 if (!frame_pointer_needed || !frame.to_allocate)
5686 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5687 else
5688 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5689 -frame.nregs * UNITS_PER_WORD);
5692 pic_reg_used = false;
5693 if (pic_offset_table_rtx
5694 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5695 || current_function_profile))
5697 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5699 if (alt_pic_reg_used != INVALID_REGNUM)
5700 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5702 pic_reg_used = true;
5705 if (pic_reg_used)
5707 if (TARGET_64BIT)
5708 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5709 else
5710 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5712 /* Even with accurate pre-reload life analysis, we can wind up
5713 deleting all references to the pic register after reload.
5714 Consider if cross-jumping unifies two sides of a branch
5715 controlled by a comparison vs the only read from a global.
5716 In which case, allow the set_got to be deleted, though we're
5717 too late to do anything about the ebx save in the prologue. */
5718 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5721 /* Prevent function calls from be scheduled before the call to mcount.
5722 In the pic_reg_used case, make sure that the got load isn't deleted. */
5723 if (current_function_profile)
5724 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5727 /* Emit code to restore saved registers using MOV insns. First register
5728 is restored from POINTER + OFFSET. */
5729 static void
5730 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5731 int maybe_eh_return)
5733 int regno;
5734 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5736 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5737 if (ix86_save_reg (regno, maybe_eh_return))
5739 /* Ensure that adjust_address won't be forced to produce pointer
5740 out of range allowed by x86-64 instruction set. */
5741 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5743 rtx r11;
5745 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5746 emit_move_insn (r11, GEN_INT (offset));
5747 emit_insn (gen_adddi3 (r11, r11, pointer));
5748 base_address = gen_rtx_MEM (Pmode, r11);
5749 offset = 0;
5751 emit_move_insn (gen_rtx_REG (Pmode, regno),
5752 adjust_address (base_address, Pmode, offset));
5753 offset += UNITS_PER_WORD;
5757 /* Restore function stack, frame, and registers. */
5759 void
5760 ix86_expand_epilogue (int style)
5762 int regno;
5763 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5764 struct ix86_frame frame;
5765 HOST_WIDE_INT offset;
5767 ix86_compute_frame_layout (&frame);
5769 /* Calculate start of saved registers relative to ebp. Special care
5770 must be taken for the normal return case of a function using
5771 eh_return: the eax and edx registers are marked as saved, but not
5772 restored along this path. */
5773 offset = frame.nregs;
5774 if (current_function_calls_eh_return && style != 2)
5775 offset -= 2;
5776 offset *= -UNITS_PER_WORD;
5778 /* If we're only restoring one register and sp is not valid then
5779 using a move instruction to restore the register since it's
5780 less work than reloading sp and popping the register.
5782 The default code result in stack adjustment using add/lea instruction,
5783 while this code results in LEAVE instruction (or discrete equivalent),
5784 so it is profitable in some other cases as well. Especially when there
5785 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5786 and there is exactly one register to pop. This heuristic may need some
5787 tuning in future. */
5788 if ((!sp_valid && frame.nregs <= 1)
5789 || (TARGET_EPILOGUE_USING_MOVE
5790 && cfun->machine->use_fast_prologue_epilogue
5791 && (frame.nregs > 1 || frame.to_allocate))
5792 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5793 || (frame_pointer_needed && TARGET_USE_LEAVE
5794 && cfun->machine->use_fast_prologue_epilogue
5795 && frame.nregs == 1)
5796 || current_function_calls_eh_return)
5798 /* Restore registers. We can use ebp or esp to address the memory
5799 locations. If both are available, default to ebp, since offsets
5800 are known to be small. Only exception is esp pointing directly to the
5801 end of block of saved registers, where we may simplify addressing
5802 mode. */
5804 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5805 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5806 frame.to_allocate, style == 2);
5807 else
5808 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5809 offset, style == 2);
5811 /* eh_return epilogues need %ecx added to the stack pointer. */
5812 if (style == 2)
5814 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5816 if (frame_pointer_needed)
5818 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5819 tmp = plus_constant (tmp, UNITS_PER_WORD);
5820 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5822 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5823 emit_move_insn (hard_frame_pointer_rtx, tmp);
5825 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5826 const0_rtx, style);
5828 else
5830 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5831 tmp = plus_constant (tmp, (frame.to_allocate
5832 + frame.nregs * UNITS_PER_WORD));
5833 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5836 else if (!frame_pointer_needed)
5837 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5838 GEN_INT (frame.to_allocate
5839 + frame.nregs * UNITS_PER_WORD),
5840 style);
5841 /* If not an i386, mov & pop is faster than "leave". */
5842 else if (TARGET_USE_LEAVE || optimize_size
5843 || !cfun->machine->use_fast_prologue_epilogue)
5844 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5845 else
5847 pro_epilogue_adjust_stack (stack_pointer_rtx,
5848 hard_frame_pointer_rtx,
5849 const0_rtx, style);
5850 if (TARGET_64BIT)
5851 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5852 else
5853 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5856 else
5858 /* First step is to deallocate the stack frame so that we can
5859 pop the registers. */
5860 if (!sp_valid)
5862 gcc_assert (frame_pointer_needed);
5863 pro_epilogue_adjust_stack (stack_pointer_rtx,
5864 hard_frame_pointer_rtx,
5865 GEN_INT (offset), style);
5867 else if (frame.to_allocate)
5868 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5869 GEN_INT (frame.to_allocate), style);
5871 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5872 if (ix86_save_reg (regno, false))
5874 if (TARGET_64BIT)
5875 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5876 else
5877 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5879 if (frame_pointer_needed)
5881 /* Leave results in shorter dependency chains on CPUs that are
5882 able to grok it fast. */
5883 if (TARGET_USE_LEAVE)
5884 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5885 else if (TARGET_64BIT)
5886 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5887 else
5888 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5892 if (cfun->machine->force_align_arg_pointer)
5894 emit_insn (gen_addsi3 (stack_pointer_rtx,
5895 cfun->machine->force_align_arg_pointer,
5896 GEN_INT (-4)));
5899 /* Sibcall epilogues don't want a return instruction. */
5900 if (style == 0)
5901 return;
5903 if (current_function_pops_args && current_function_args_size)
5905 rtx popc = GEN_INT (current_function_pops_args);
5907 /* i386 can only pop 64K bytes. If asked to pop more, pop
5908 return address, do explicit add, and jump indirectly to the
5909 caller. */
5911 if (current_function_pops_args >= 65536)
5913 rtx ecx = gen_rtx_REG (SImode, 2);
5915 /* There is no "pascal" calling convention in 64bit ABI. */
5916 gcc_assert (!TARGET_64BIT);
5918 emit_insn (gen_popsi1 (ecx));
5919 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5920 emit_jump_insn (gen_return_indirect_internal (ecx));
5922 else
5923 emit_jump_insn (gen_return_pop_internal (popc));
5925 else
5926 emit_jump_insn (gen_return_internal ());
5929 /* Reset from the function's potential modifications. */
5931 static void
5932 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5933 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5935 if (pic_offset_table_rtx)
5936 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5937 #if TARGET_MACHO
5938 /* Mach-O doesn't support labels at the end of objects, so if
5939 it looks like we might want one, insert a NOP. */
5941 rtx insn = get_last_insn ();
5942 while (insn
5943 && NOTE_P (insn)
5944 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5945 insn = PREV_INSN (insn);
5946 if (insn
5947 && (LABEL_P (insn)
5948 || (NOTE_P (insn)
5949 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5950 fputs ("\tnop\n", file);
5952 #endif
5956 /* Extract the parts of an RTL expression that is a valid memory address
5957 for an instruction. Return 0 if the structure of the address is
5958 grossly off. Return -1 if the address contains ASHIFT, so it is not
5959 strictly valid, but still used for computing length of lea instruction. */
5962 ix86_decompose_address (rtx addr, struct ix86_address *out)
5964 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5965 rtx base_reg, index_reg;
5966 HOST_WIDE_INT scale = 1;
5967 rtx scale_rtx = NULL_RTX;
5968 int retval = 1;
5969 enum ix86_address_seg seg = SEG_DEFAULT;
5971 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5972 base = addr;
5973 else if (GET_CODE (addr) == PLUS)
5975 rtx addends[4], op;
5976 int n = 0, i;
5978 op = addr;
5981 if (n >= 4)
5982 return 0;
5983 addends[n++] = XEXP (op, 1);
5984 op = XEXP (op, 0);
5986 while (GET_CODE (op) == PLUS);
5987 if (n >= 4)
5988 return 0;
5989 addends[n] = op;
5991 for (i = n; i >= 0; --i)
5993 op = addends[i];
5994 switch (GET_CODE (op))
5996 case MULT:
5997 if (index)
5998 return 0;
5999 index = XEXP (op, 0);
6000 scale_rtx = XEXP (op, 1);
6001 break;
6003 case UNSPEC:
6004 if (XINT (op, 1) == UNSPEC_TP
6005 && TARGET_TLS_DIRECT_SEG_REFS
6006 && seg == SEG_DEFAULT)
6007 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6008 else
6009 return 0;
6010 break;
6012 case REG:
6013 case SUBREG:
6014 if (!base)
6015 base = op;
6016 else if (!index)
6017 index = op;
6018 else
6019 return 0;
6020 break;
6022 case CONST:
6023 case CONST_INT:
6024 case SYMBOL_REF:
6025 case LABEL_REF:
6026 if (disp)
6027 return 0;
6028 disp = op;
6029 break;
6031 default:
6032 return 0;
6036 else if (GET_CODE (addr) == MULT)
6038 index = XEXP (addr, 0); /* index*scale */
6039 scale_rtx = XEXP (addr, 1);
6041 else if (GET_CODE (addr) == ASHIFT)
6043 rtx tmp;
6045 /* We're called for lea too, which implements ashift on occasion. */
6046 index = XEXP (addr, 0);
6047 tmp = XEXP (addr, 1);
6048 if (GET_CODE (tmp) != CONST_INT)
6049 return 0;
6050 scale = INTVAL (tmp);
6051 if ((unsigned HOST_WIDE_INT) scale > 3)
6052 return 0;
6053 scale = 1 << scale;
6054 retval = -1;
6056 else
6057 disp = addr; /* displacement */
6059 /* Extract the integral value of scale. */
6060 if (scale_rtx)
6062 if (GET_CODE (scale_rtx) != CONST_INT)
6063 return 0;
6064 scale = INTVAL (scale_rtx);
6067 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6068 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6070 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6071 if (base_reg && index_reg && scale == 1
6072 && (index_reg == arg_pointer_rtx
6073 || index_reg == frame_pointer_rtx
6074 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6076 rtx tmp;
6077 tmp = base, base = index, index = tmp;
6078 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6081 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6082 if ((base_reg == hard_frame_pointer_rtx
6083 || base_reg == frame_pointer_rtx
6084 || base_reg == arg_pointer_rtx) && !disp)
6085 disp = const0_rtx;
6087 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6088 Avoid this by transforming to [%esi+0]. */
6089 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6090 && base_reg && !index_reg && !disp
6091 && REG_P (base_reg)
6092 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6093 disp = const0_rtx;
6095 /* Special case: encode reg+reg instead of reg*2. */
6096 if (!base && index && scale && scale == 2)
6097 base = index, base_reg = index_reg, scale = 1;
6099 /* Special case: scaling cannot be encoded without base or displacement. */
6100 if (!base && !disp && index && scale != 1)
6101 disp = const0_rtx;
6103 out->base = base;
6104 out->index = index;
6105 out->disp = disp;
6106 out->scale = scale;
6107 out->seg = seg;
6109 return retval;
6112 /* Return cost of the memory address x.
6113 For i386, it is better to use a complex address than let gcc copy
6114 the address into a reg and make a new pseudo. But not if the address
6115 requires to two regs - that would mean more pseudos with longer
6116 lifetimes. */
6117 static int
6118 ix86_address_cost (rtx x)
6120 struct ix86_address parts;
6121 int cost = 1;
6122 int ok = ix86_decompose_address (x, &parts);
6124 gcc_assert (ok);
6126 if (parts.base && GET_CODE (parts.base) == SUBREG)
6127 parts.base = SUBREG_REG (parts.base);
6128 if (parts.index && GET_CODE (parts.index) == SUBREG)
6129 parts.index = SUBREG_REG (parts.index);
6131 /* More complex memory references are better. */
6132 if (parts.disp && parts.disp != const0_rtx)
6133 cost--;
6134 if (parts.seg != SEG_DEFAULT)
6135 cost--;
6137 /* Attempt to minimize number of registers in the address. */
6138 if ((parts.base
6139 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6140 || (parts.index
6141 && (!REG_P (parts.index)
6142 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6143 cost++;
6145 if (parts.base
6146 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6147 && parts.index
6148 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6149 && parts.base != parts.index)
6150 cost++;
6152 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6153 since it's predecode logic can't detect the length of instructions
6154 and it degenerates to vector decoded. Increase cost of such
6155 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6156 to split such addresses or even refuse such addresses at all.
6158 Following addressing modes are affected:
6159 [base+scale*index]
6160 [scale*index+disp]
6161 [base+index]
6163 The first and last case may be avoidable by explicitly coding the zero in
6164 memory address, but I don't have AMD-K6 machine handy to check this
6165 theory. */
6167 if (TARGET_K6
6168 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6169 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6170 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6171 cost += 10;
6173 return cost;
6176 /* If X is a machine specific address (i.e. a symbol or label being
6177 referenced as a displacement from the GOT implemented using an
6178 UNSPEC), then return the base term. Otherwise return X. */
6181 ix86_find_base_term (rtx x)
6183 rtx term;
6185 if (TARGET_64BIT)
6187 if (GET_CODE (x) != CONST)
6188 return x;
6189 term = XEXP (x, 0);
6190 if (GET_CODE (term) == PLUS
6191 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6192 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6193 term = XEXP (term, 0);
6194 if (GET_CODE (term) != UNSPEC
6195 || XINT (term, 1) != UNSPEC_GOTPCREL)
6196 return x;
6198 term = XVECEXP (term, 0, 0);
6200 if (GET_CODE (term) != SYMBOL_REF
6201 && GET_CODE (term) != LABEL_REF)
6202 return x;
6204 return term;
6207 term = ix86_delegitimize_address (x);
6209 if (GET_CODE (term) != SYMBOL_REF
6210 && GET_CODE (term) != LABEL_REF)
6211 return x;
6213 return term;
6216 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6217 this is used for to form addresses to local data when -fPIC is in
6218 use. */
6220 static bool
6221 darwin_local_data_pic (rtx disp)
6223 if (GET_CODE (disp) == MINUS)
6225 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6226 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6227 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6229 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6230 if (! strcmp (sym_name, "<pic base>"))
6231 return true;
6235 return false;
6238 /* Determine if a given RTX is a valid constant. We already know this
6239 satisfies CONSTANT_P. */
6241 bool
6242 legitimate_constant_p (rtx x)
6244 switch (GET_CODE (x))
6246 case CONST:
6247 x = XEXP (x, 0);
6249 if (GET_CODE (x) == PLUS)
6251 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6252 return false;
6253 x = XEXP (x, 0);
6256 if (TARGET_MACHO && darwin_local_data_pic (x))
6257 return true;
6259 /* Only some unspecs are valid as "constants". */
6260 if (GET_CODE (x) == UNSPEC)
6261 switch (XINT (x, 1))
6263 case UNSPEC_GOTOFF:
6264 return TARGET_64BIT;
6265 case UNSPEC_TPOFF:
6266 case UNSPEC_NTPOFF:
6267 x = XVECEXP (x, 0, 0);
6268 return (GET_CODE (x) == SYMBOL_REF
6269 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6270 case UNSPEC_DTPOFF:
6271 x = XVECEXP (x, 0, 0);
6272 return (GET_CODE (x) == SYMBOL_REF
6273 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6274 default:
6275 return false;
6278 /* We must have drilled down to a symbol. */
6279 if (GET_CODE (x) == LABEL_REF)
6280 return true;
6281 if (GET_CODE (x) != SYMBOL_REF)
6282 return false;
6283 /* FALLTHRU */
6285 case SYMBOL_REF:
6286 /* TLS symbols are never valid. */
6287 if (SYMBOL_REF_TLS_MODEL (x))
6288 return false;
6289 break;
6291 case CONST_DOUBLE:
6292 if (GET_MODE (x) == TImode
6293 && x != CONST0_RTX (TImode)
6294 && !TARGET_64BIT)
6295 return false;
6296 break;
6298 case CONST_VECTOR:
6299 if (x == CONST0_RTX (GET_MODE (x)))
6300 return true;
6301 return false;
6303 default:
6304 break;
6307 /* Otherwise we handle everything else in the move patterns. */
6308 return true;
6311 /* Determine if it's legal to put X into the constant pool. This
6312 is not possible for the address of thread-local symbols, which
6313 is checked above. */
6315 static bool
6316 ix86_cannot_force_const_mem (rtx x)
6318 /* We can always put integral constants and vectors in memory. */
6319 switch (GET_CODE (x))
6321 case CONST_INT:
6322 case CONST_DOUBLE:
6323 case CONST_VECTOR:
6324 return false;
6326 default:
6327 break;
6329 return !legitimate_constant_p (x);
6332 /* Determine if a given RTX is a valid constant address. */
6334 bool
6335 constant_address_p (rtx x)
6337 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6340 /* Nonzero if the constant value X is a legitimate general operand
6341 when generating PIC code. It is given that flag_pic is on and
6342 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6344 bool
6345 legitimate_pic_operand_p (rtx x)
6347 rtx inner;
6349 switch (GET_CODE (x))
6351 case CONST:
6352 inner = XEXP (x, 0);
6353 if (GET_CODE (inner) == PLUS
6354 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6355 inner = XEXP (inner, 0);
6357 /* Only some unspecs are valid as "constants". */
6358 if (GET_CODE (inner) == UNSPEC)
6359 switch (XINT (inner, 1))
6361 case UNSPEC_GOTOFF:
6362 return TARGET_64BIT;
6363 case UNSPEC_TPOFF:
6364 x = XVECEXP (inner, 0, 0);
6365 return (GET_CODE (x) == SYMBOL_REF
6366 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6367 default:
6368 return false;
6370 /* FALLTHRU */
6372 case SYMBOL_REF:
6373 case LABEL_REF:
6374 return legitimate_pic_address_disp_p (x);
6376 default:
6377 return true;
6381 /* Determine if a given CONST RTX is a valid memory displacement
6382 in PIC mode. */
6385 legitimate_pic_address_disp_p (rtx disp)
6387 bool saw_plus;
6389 /* In 64bit mode we can allow direct addresses of symbols and labels
6390 when they are not dynamic symbols. */
6391 if (TARGET_64BIT)
6393 rtx op0 = disp, op1;
6395 switch (GET_CODE (disp))
6397 case LABEL_REF:
6398 return true;
6400 case CONST:
6401 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6402 break;
6403 op0 = XEXP (XEXP (disp, 0), 0);
6404 op1 = XEXP (XEXP (disp, 0), 1);
6405 if (GET_CODE (op1) != CONST_INT
6406 || INTVAL (op1) >= 16*1024*1024
6407 || INTVAL (op1) < -16*1024*1024)
6408 break;
6409 if (GET_CODE (op0) == LABEL_REF)
6410 return true;
6411 if (GET_CODE (op0) != SYMBOL_REF)
6412 break;
6413 /* FALLTHRU */
6415 case SYMBOL_REF:
6416 /* TLS references should always be enclosed in UNSPEC. */
6417 if (SYMBOL_REF_TLS_MODEL (op0))
6418 return false;
6419 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6420 return true;
6421 break;
6423 default:
6424 break;
6427 if (GET_CODE (disp) != CONST)
6428 return 0;
6429 disp = XEXP (disp, 0);
6431 if (TARGET_64BIT)
6433 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6434 of GOT tables. We should not need these anyway. */
6435 if (GET_CODE (disp) != UNSPEC
6436 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6437 && XINT (disp, 1) != UNSPEC_GOTOFF))
6438 return 0;
6440 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6441 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6442 return 0;
6443 return 1;
6446 saw_plus = false;
6447 if (GET_CODE (disp) == PLUS)
6449 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6450 return 0;
6451 disp = XEXP (disp, 0);
6452 saw_plus = true;
6455 if (TARGET_MACHO && darwin_local_data_pic (disp))
6456 return 1;
6458 if (GET_CODE (disp) != UNSPEC)
6459 return 0;
6461 switch (XINT (disp, 1))
6463 case UNSPEC_GOT:
6464 if (saw_plus)
6465 return false;
6466 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6467 case UNSPEC_GOTOFF:
6468 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6469 While ABI specify also 32bit relocation but we don't produce it in
6470 small PIC model at all. */
6471 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6472 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6473 && !TARGET_64BIT)
6474 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6475 return false;
6476 case UNSPEC_GOTTPOFF:
6477 case UNSPEC_GOTNTPOFF:
6478 case UNSPEC_INDNTPOFF:
6479 if (saw_plus)
6480 return false;
6481 disp = XVECEXP (disp, 0, 0);
6482 return (GET_CODE (disp) == SYMBOL_REF
6483 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6484 case UNSPEC_NTPOFF:
6485 disp = XVECEXP (disp, 0, 0);
6486 return (GET_CODE (disp) == SYMBOL_REF
6487 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6488 case UNSPEC_DTPOFF:
6489 disp = XVECEXP (disp, 0, 0);
6490 return (GET_CODE (disp) == SYMBOL_REF
6491 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6494 return 0;
6497 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6498 memory address for an instruction. The MODE argument is the machine mode
6499 for the MEM expression that wants to use this address.
6501 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6502 convert common non-canonical forms to canonical form so that they will
6503 be recognized. */
6506 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6508 struct ix86_address parts;
6509 rtx base, index, disp;
6510 HOST_WIDE_INT scale;
6511 const char *reason = NULL;
6512 rtx reason_rtx = NULL_RTX;
6514 if (TARGET_DEBUG_ADDR)
6516 fprintf (stderr,
6517 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6518 GET_MODE_NAME (mode), strict);
6519 debug_rtx (addr);
6522 if (ix86_decompose_address (addr, &parts) <= 0)
6524 reason = "decomposition failed";
6525 goto report_error;
6528 base = parts.base;
6529 index = parts.index;
6530 disp = parts.disp;
6531 scale = parts.scale;
6533 /* Validate base register.
6535 Don't allow SUBREG's that span more than a word here. It can lead to spill
6536 failures when the base is one word out of a two word structure, which is
6537 represented internally as a DImode int. */
6539 if (base)
6541 rtx reg;
6542 reason_rtx = base;
6544 if (REG_P (base))
6545 reg = base;
6546 else if (GET_CODE (base) == SUBREG
6547 && REG_P (SUBREG_REG (base))
6548 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6549 <= UNITS_PER_WORD)
6550 reg = SUBREG_REG (base);
6551 else
6553 reason = "base is not a register";
6554 goto report_error;
6557 if (GET_MODE (base) != Pmode)
6559 reason = "base is not in Pmode";
6560 goto report_error;
6563 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6564 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6566 reason = "base is not valid";
6567 goto report_error;
6571 /* Validate index register.
6573 Don't allow SUBREG's that span more than a word here -- same as above. */
6575 if (index)
6577 rtx reg;
6578 reason_rtx = index;
6580 if (REG_P (index))
6581 reg = index;
6582 else if (GET_CODE (index) == SUBREG
6583 && REG_P (SUBREG_REG (index))
6584 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6585 <= UNITS_PER_WORD)
6586 reg = SUBREG_REG (index);
6587 else
6589 reason = "index is not a register";
6590 goto report_error;
6593 if (GET_MODE (index) != Pmode)
6595 reason = "index is not in Pmode";
6596 goto report_error;
6599 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6600 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6602 reason = "index is not valid";
6603 goto report_error;
6607 /* Validate scale factor. */
6608 if (scale != 1)
6610 reason_rtx = GEN_INT (scale);
6611 if (!index)
6613 reason = "scale without index";
6614 goto report_error;
6617 if (scale != 2 && scale != 4 && scale != 8)
6619 reason = "scale is not a valid multiplier";
6620 goto report_error;
6624 /* Validate displacement. */
6625 if (disp)
6627 reason_rtx = disp;
6629 if (GET_CODE (disp) == CONST
6630 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6631 switch (XINT (XEXP (disp, 0), 1))
6633 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6634 used. While ABI specify also 32bit relocations, we don't produce
6635 them at all and use IP relative instead. */
6636 case UNSPEC_GOT:
6637 case UNSPEC_GOTOFF:
6638 gcc_assert (flag_pic);
6639 if (!TARGET_64BIT)
6640 goto is_legitimate_pic;
6641 reason = "64bit address unspec";
6642 goto report_error;
6644 case UNSPEC_GOTPCREL:
6645 gcc_assert (flag_pic);
6646 goto is_legitimate_pic;
6648 case UNSPEC_GOTTPOFF:
6649 case UNSPEC_GOTNTPOFF:
6650 case UNSPEC_INDNTPOFF:
6651 case UNSPEC_NTPOFF:
6652 case UNSPEC_DTPOFF:
6653 break;
6655 default:
6656 reason = "invalid address unspec";
6657 goto report_error;
6660 else if (SYMBOLIC_CONST (disp)
6661 && (flag_pic
6662 || (TARGET_MACHO
6663 #if TARGET_MACHO
6664 && MACHOPIC_INDIRECT
6665 && !machopic_operand_p (disp)
6666 #endif
6670 is_legitimate_pic:
6671 if (TARGET_64BIT && (index || base))
6673 /* foo@dtpoff(%rX) is ok. */
6674 if (GET_CODE (disp) != CONST
6675 || GET_CODE (XEXP (disp, 0)) != PLUS
6676 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6677 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6678 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6679 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6681 reason = "non-constant pic memory reference";
6682 goto report_error;
6685 else if (! legitimate_pic_address_disp_p (disp))
6687 reason = "displacement is an invalid pic construct";
6688 goto report_error;
6691 /* This code used to verify that a symbolic pic displacement
6692 includes the pic_offset_table_rtx register.
6694 While this is good idea, unfortunately these constructs may
6695 be created by "adds using lea" optimization for incorrect
6696 code like:
6698 int a;
6699 int foo(int i)
6701 return *(&a+i);
6704 This code is nonsensical, but results in addressing
6705 GOT table with pic_offset_table_rtx base. We can't
6706 just refuse it easily, since it gets matched by
6707 "addsi3" pattern, that later gets split to lea in the
6708 case output register differs from input. While this
6709 can be handled by separate addsi pattern for this case
6710 that never results in lea, this seems to be easier and
6711 correct fix for crash to disable this test. */
6713 else if (GET_CODE (disp) != LABEL_REF
6714 && GET_CODE (disp) != CONST_INT
6715 && (GET_CODE (disp) != CONST
6716 || !legitimate_constant_p (disp))
6717 && (GET_CODE (disp) != SYMBOL_REF
6718 || !legitimate_constant_p (disp)))
6720 reason = "displacement is not constant";
6721 goto report_error;
6723 else if (TARGET_64BIT
6724 && !x86_64_immediate_operand (disp, VOIDmode))
6726 reason = "displacement is out of range";
6727 goto report_error;
6731 /* Everything looks valid. */
6732 if (TARGET_DEBUG_ADDR)
6733 fprintf (stderr, "Success.\n");
6734 return TRUE;
6736 report_error:
6737 if (TARGET_DEBUG_ADDR)
6739 fprintf (stderr, "Error: %s\n", reason);
6740 debug_rtx (reason_rtx);
6742 return FALSE;
6745 /* Return a unique alias set for the GOT. */
6747 static HOST_WIDE_INT
6748 ix86_GOT_alias_set (void)
6750 static HOST_WIDE_INT set = -1;
6751 if (set == -1)
6752 set = new_alias_set ();
6753 return set;
6756 /* Return a legitimate reference for ORIG (an address) using the
6757 register REG. If REG is 0, a new pseudo is generated.
6759 There are two types of references that must be handled:
6761 1. Global data references must load the address from the GOT, via
6762 the PIC reg. An insn is emitted to do this load, and the reg is
6763 returned.
6765 2. Static data references, constant pool addresses, and code labels
6766 compute the address as an offset from the GOT, whose base is in
6767 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6768 differentiate them from global data objects. The returned
6769 address is the PIC reg + an unspec constant.
6771 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6772 reg also appears in the address. */
6774 static rtx
6775 legitimize_pic_address (rtx orig, rtx reg)
6777 rtx addr = orig;
6778 rtx new = orig;
6779 rtx base;
6781 #if TARGET_MACHO
6782 if (TARGET_MACHO && !TARGET_64BIT)
6784 if (reg == 0)
6785 reg = gen_reg_rtx (Pmode);
6786 /* Use the generic Mach-O PIC machinery. */
6787 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6789 #endif
6791 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6792 new = addr;
6793 else if (TARGET_64BIT
6794 && ix86_cmodel != CM_SMALL_PIC
6795 && local_symbolic_operand (addr, Pmode))
6797 rtx tmpreg;
6798 /* This symbol may be referenced via a displacement from the PIC
6799 base address (@GOTOFF). */
6801 if (reload_in_progress)
6802 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6803 if (GET_CODE (addr) == CONST)
6804 addr = XEXP (addr, 0);
6805 if (GET_CODE (addr) == PLUS)
6807 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6808 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6810 else
6811 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6812 new = gen_rtx_CONST (Pmode, new);
6813 if (!reg)
6814 tmpreg = gen_reg_rtx (Pmode);
6815 else
6816 tmpreg = reg;
6817 emit_move_insn (tmpreg, new);
6819 if (reg != 0)
6821 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6822 tmpreg, 1, OPTAB_DIRECT);
6823 new = reg;
6825 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6827 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6829 /* This symbol may be referenced via a displacement from the PIC
6830 base address (@GOTOFF). */
6832 if (reload_in_progress)
6833 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6834 if (GET_CODE (addr) == CONST)
6835 addr = XEXP (addr, 0);
6836 if (GET_CODE (addr) == PLUS)
6838 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6839 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6841 else
6842 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6843 new = gen_rtx_CONST (Pmode, new);
6844 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6846 if (reg != 0)
6848 emit_move_insn (reg, new);
6849 new = reg;
6852 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6854 if (TARGET_64BIT)
6856 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6857 new = gen_rtx_CONST (Pmode, new);
6858 new = gen_const_mem (Pmode, new);
6859 set_mem_alias_set (new, ix86_GOT_alias_set ());
6861 if (reg == 0)
6862 reg = gen_reg_rtx (Pmode);
6863 /* Use directly gen_movsi, otherwise the address is loaded
6864 into register for CSE. We don't want to CSE this addresses,
6865 instead we CSE addresses from the GOT table, so skip this. */
6866 emit_insn (gen_movsi (reg, new));
6867 new = reg;
6869 else
6871 /* This symbol must be referenced via a load from the
6872 Global Offset Table (@GOT). */
6874 if (reload_in_progress)
6875 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6876 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6877 new = gen_rtx_CONST (Pmode, new);
6878 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6879 new = gen_const_mem (Pmode, new);
6880 set_mem_alias_set (new, ix86_GOT_alias_set ());
6882 if (reg == 0)
6883 reg = gen_reg_rtx (Pmode);
6884 emit_move_insn (reg, new);
6885 new = reg;
6888 else
6890 if (GET_CODE (addr) == CONST_INT
6891 && !x86_64_immediate_operand (addr, VOIDmode))
6893 if (reg)
6895 emit_move_insn (reg, addr);
6896 new = reg;
6898 else
6899 new = force_reg (Pmode, addr);
6901 else if (GET_CODE (addr) == CONST)
6903 addr = XEXP (addr, 0);
6905 /* We must match stuff we generate before. Assume the only
6906 unspecs that can get here are ours. Not that we could do
6907 anything with them anyway.... */
6908 if (GET_CODE (addr) == UNSPEC
6909 || (GET_CODE (addr) == PLUS
6910 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6911 return orig;
6912 gcc_assert (GET_CODE (addr) == PLUS);
6914 if (GET_CODE (addr) == PLUS)
6916 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6918 /* Check first to see if this is a constant offset from a @GOTOFF
6919 symbol reference. */
6920 if (local_symbolic_operand (op0, Pmode)
6921 && GET_CODE (op1) == CONST_INT)
6923 if (!TARGET_64BIT)
6925 if (reload_in_progress)
6926 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6927 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6928 UNSPEC_GOTOFF);
6929 new = gen_rtx_PLUS (Pmode, new, op1);
6930 new = gen_rtx_CONST (Pmode, new);
6931 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6933 if (reg != 0)
6935 emit_move_insn (reg, new);
6936 new = reg;
6939 else
6941 if (INTVAL (op1) < -16*1024*1024
6942 || INTVAL (op1) >= 16*1024*1024)
6944 if (!x86_64_immediate_operand (op1, Pmode))
6945 op1 = force_reg (Pmode, op1);
6946 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6950 else
6952 base = legitimize_pic_address (XEXP (addr, 0), reg);
6953 new = legitimize_pic_address (XEXP (addr, 1),
6954 base == reg ? NULL_RTX : reg);
6956 if (GET_CODE (new) == CONST_INT)
6957 new = plus_constant (base, INTVAL (new));
6958 else
6960 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6962 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6963 new = XEXP (new, 1);
6965 new = gen_rtx_PLUS (Pmode, base, new);
6970 return new;
6973 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6975 static rtx
6976 get_thread_pointer (int to_reg)
6978 rtx tp, reg, insn;
6980 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6981 if (!to_reg)
6982 return tp;
6984 reg = gen_reg_rtx (Pmode);
6985 insn = gen_rtx_SET (VOIDmode, reg, tp);
6986 insn = emit_insn (insn);
6988 return reg;
6991 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6992 false if we expect this to be used for a memory address and true if
6993 we expect to load the address into a register. */
6995 static rtx
6996 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6998 rtx dest, base, off, pic, tp;
6999 int type;
7001 switch (model)
7003 case TLS_MODEL_GLOBAL_DYNAMIC:
7004 dest = gen_reg_rtx (Pmode);
7005 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7007 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7009 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7011 start_sequence ();
7012 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7013 insns = get_insns ();
7014 end_sequence ();
7016 emit_libcall_block (insns, dest, rax, x);
7018 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7019 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7020 else
7021 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7023 if (TARGET_GNU2_TLS)
7025 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7027 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7029 break;
7031 case TLS_MODEL_LOCAL_DYNAMIC:
7032 base = gen_reg_rtx (Pmode);
7033 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7035 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7037 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7039 start_sequence ();
7040 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7041 insns = get_insns ();
7042 end_sequence ();
7044 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7045 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7046 emit_libcall_block (insns, base, rax, note);
7048 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7049 emit_insn (gen_tls_local_dynamic_base_64 (base));
7050 else
7051 emit_insn (gen_tls_local_dynamic_base_32 (base));
7053 if (TARGET_GNU2_TLS)
7055 rtx x = ix86_tls_module_base ();
7057 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7058 gen_rtx_MINUS (Pmode, x, tp));
7061 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7062 off = gen_rtx_CONST (Pmode, off);
7064 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7066 if (TARGET_GNU2_TLS)
7068 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7070 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7073 break;
7075 case TLS_MODEL_INITIAL_EXEC:
7076 if (TARGET_64BIT)
7078 pic = NULL;
7079 type = UNSPEC_GOTNTPOFF;
7081 else if (flag_pic)
7083 if (reload_in_progress)
7084 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7085 pic = pic_offset_table_rtx;
7086 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7088 else if (!TARGET_ANY_GNU_TLS)
7090 pic = gen_reg_rtx (Pmode);
7091 emit_insn (gen_set_got (pic));
7092 type = UNSPEC_GOTTPOFF;
7094 else
7096 pic = NULL;
7097 type = UNSPEC_INDNTPOFF;
7100 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7101 off = gen_rtx_CONST (Pmode, off);
7102 if (pic)
7103 off = gen_rtx_PLUS (Pmode, pic, off);
7104 off = gen_const_mem (Pmode, off);
7105 set_mem_alias_set (off, ix86_GOT_alias_set ());
7107 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7109 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7110 off = force_reg (Pmode, off);
7111 return gen_rtx_PLUS (Pmode, base, off);
7113 else
7115 base = get_thread_pointer (true);
7116 dest = gen_reg_rtx (Pmode);
7117 emit_insn (gen_subsi3 (dest, base, off));
7119 break;
7121 case TLS_MODEL_LOCAL_EXEC:
7122 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7123 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7124 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7125 off = gen_rtx_CONST (Pmode, off);
7127 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7129 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7130 return gen_rtx_PLUS (Pmode, base, off);
7132 else
7134 base = get_thread_pointer (true);
7135 dest = gen_reg_rtx (Pmode);
7136 emit_insn (gen_subsi3 (dest, base, off));
7138 break;
7140 default:
7141 gcc_unreachable ();
7144 return dest;
7147 /* Try machine-dependent ways of modifying an illegitimate address
7148 to be legitimate. If we find one, return the new, valid address.
7149 This macro is used in only one place: `memory_address' in explow.c.
7151 OLDX is the address as it was before break_out_memory_refs was called.
7152 In some cases it is useful to look at this to decide what needs to be done.
7154 MODE and WIN are passed so that this macro can use
7155 GO_IF_LEGITIMATE_ADDRESS.
7157 It is always safe for this macro to do nothing. It exists to recognize
7158 opportunities to optimize the output.
7160 For the 80386, we handle X+REG by loading X into a register R and
7161 using R+REG. R will go in a general reg and indexing will be used.
7162 However, if REG is a broken-out memory address or multiplication,
7163 nothing needs to be done because REG can certainly go in a general reg.
7165 When -fpic is used, special handling is needed for symbolic references.
7166 See comments by legitimize_pic_address in i386.c for details. */
7169 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7171 int changed = 0;
7172 unsigned log;
7174 if (TARGET_DEBUG_ADDR)
7176 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7177 GET_MODE_NAME (mode));
7178 debug_rtx (x);
7181 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7182 if (log)
7183 return legitimize_tls_address (x, log, false);
7184 if (GET_CODE (x) == CONST
7185 && GET_CODE (XEXP (x, 0)) == PLUS
7186 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7187 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7189 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7190 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7193 if (flag_pic && SYMBOLIC_CONST (x))
7194 return legitimize_pic_address (x, 0);
7196 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7197 if (GET_CODE (x) == ASHIFT
7198 && GET_CODE (XEXP (x, 1)) == CONST_INT
7199 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7201 changed = 1;
7202 log = INTVAL (XEXP (x, 1));
7203 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7204 GEN_INT (1 << log));
7207 if (GET_CODE (x) == PLUS)
7209 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7211 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7212 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7213 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7215 changed = 1;
7216 log = INTVAL (XEXP (XEXP (x, 0), 1));
7217 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7218 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7219 GEN_INT (1 << log));
7222 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7223 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7224 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7226 changed = 1;
7227 log = INTVAL (XEXP (XEXP (x, 1), 1));
7228 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7229 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7230 GEN_INT (1 << log));
7233 /* Put multiply first if it isn't already. */
7234 if (GET_CODE (XEXP (x, 1)) == MULT)
7236 rtx tmp = XEXP (x, 0);
7237 XEXP (x, 0) = XEXP (x, 1);
7238 XEXP (x, 1) = tmp;
7239 changed = 1;
7242 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7243 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7244 created by virtual register instantiation, register elimination, and
7245 similar optimizations. */
7246 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7248 changed = 1;
7249 x = gen_rtx_PLUS (Pmode,
7250 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7251 XEXP (XEXP (x, 1), 0)),
7252 XEXP (XEXP (x, 1), 1));
7255 /* Canonicalize
7256 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7257 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7258 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7259 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7260 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7261 && CONSTANT_P (XEXP (x, 1)))
7263 rtx constant;
7264 rtx other = NULL_RTX;
7266 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7268 constant = XEXP (x, 1);
7269 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7271 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7273 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7274 other = XEXP (x, 1);
7276 else
7277 constant = 0;
7279 if (constant)
7281 changed = 1;
7282 x = gen_rtx_PLUS (Pmode,
7283 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7284 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7285 plus_constant (other, INTVAL (constant)));
7289 if (changed && legitimate_address_p (mode, x, FALSE))
7290 return x;
7292 if (GET_CODE (XEXP (x, 0)) == MULT)
7294 changed = 1;
7295 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7298 if (GET_CODE (XEXP (x, 1)) == MULT)
7300 changed = 1;
7301 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7304 if (changed
7305 && GET_CODE (XEXP (x, 1)) == REG
7306 && GET_CODE (XEXP (x, 0)) == REG)
7307 return x;
7309 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7311 changed = 1;
7312 x = legitimize_pic_address (x, 0);
7315 if (changed && legitimate_address_p (mode, x, FALSE))
7316 return x;
7318 if (GET_CODE (XEXP (x, 0)) == REG)
7320 rtx temp = gen_reg_rtx (Pmode);
7321 rtx val = force_operand (XEXP (x, 1), temp);
7322 if (val != temp)
7323 emit_move_insn (temp, val);
7325 XEXP (x, 1) = temp;
7326 return x;
7329 else if (GET_CODE (XEXP (x, 1)) == REG)
7331 rtx temp = gen_reg_rtx (Pmode);
7332 rtx val = force_operand (XEXP (x, 0), temp);
7333 if (val != temp)
7334 emit_move_insn (temp, val);
7336 XEXP (x, 0) = temp;
7337 return x;
7341 return x;
7344 /* Print an integer constant expression in assembler syntax. Addition
7345 and subtraction are the only arithmetic that may appear in these
7346 expressions. FILE is the stdio stream to write to, X is the rtx, and
7347 CODE is the operand print code from the output string. */
7349 static void
7350 output_pic_addr_const (FILE *file, rtx x, int code)
7352 char buf[256];
7354 switch (GET_CODE (x))
7356 case PC:
7357 gcc_assert (flag_pic);
7358 putc ('.', file);
7359 break;
7361 case SYMBOL_REF:
7362 output_addr_const (file, x);
7363 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7364 fputs ("@PLT", file);
7365 break;
7367 case LABEL_REF:
7368 x = XEXP (x, 0);
7369 /* FALLTHRU */
7370 case CODE_LABEL:
7371 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7372 assemble_name (asm_out_file, buf);
7373 break;
7375 case CONST_INT:
7376 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7377 break;
7379 case CONST:
7380 /* This used to output parentheses around the expression,
7381 but that does not work on the 386 (either ATT or BSD assembler). */
7382 output_pic_addr_const (file, XEXP (x, 0), code);
7383 break;
7385 case CONST_DOUBLE:
7386 if (GET_MODE (x) == VOIDmode)
7388 /* We can use %d if the number is <32 bits and positive. */
7389 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7390 fprintf (file, "0x%lx%08lx",
7391 (unsigned long) CONST_DOUBLE_HIGH (x),
7392 (unsigned long) CONST_DOUBLE_LOW (x));
7393 else
7394 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7396 else
7397 /* We can't handle floating point constants;
7398 PRINT_OPERAND must handle them. */
7399 output_operand_lossage ("floating constant misused");
7400 break;
7402 case PLUS:
7403 /* Some assemblers need integer constants to appear first. */
7404 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7406 output_pic_addr_const (file, XEXP (x, 0), code);
7407 putc ('+', file);
7408 output_pic_addr_const (file, XEXP (x, 1), code);
7410 else
7412 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7413 output_pic_addr_const (file, XEXP (x, 1), code);
7414 putc ('+', file);
7415 output_pic_addr_const (file, XEXP (x, 0), code);
7417 break;
7419 case MINUS:
7420 if (!TARGET_MACHO)
7421 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7422 output_pic_addr_const (file, XEXP (x, 0), code);
7423 putc ('-', file);
7424 output_pic_addr_const (file, XEXP (x, 1), code);
7425 if (!TARGET_MACHO)
7426 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7427 break;
7429 case UNSPEC:
7430 gcc_assert (XVECLEN (x, 0) == 1);
7431 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7432 switch (XINT (x, 1))
7434 case UNSPEC_GOT:
7435 fputs ("@GOT", file);
7436 break;
7437 case UNSPEC_GOTOFF:
7438 fputs ("@GOTOFF", file);
7439 break;
7440 case UNSPEC_GOTPCREL:
7441 fputs ("@GOTPCREL(%rip)", file);
7442 break;
7443 case UNSPEC_GOTTPOFF:
7444 /* FIXME: This might be @TPOFF in Sun ld too. */
7445 fputs ("@GOTTPOFF", file);
7446 break;
7447 case UNSPEC_TPOFF:
7448 fputs ("@TPOFF", file);
7449 break;
7450 case UNSPEC_NTPOFF:
7451 if (TARGET_64BIT)
7452 fputs ("@TPOFF", file);
7453 else
7454 fputs ("@NTPOFF", file);
7455 break;
7456 case UNSPEC_DTPOFF:
7457 fputs ("@DTPOFF", file);
7458 break;
7459 case UNSPEC_GOTNTPOFF:
7460 if (TARGET_64BIT)
7461 fputs ("@GOTTPOFF(%rip)", file);
7462 else
7463 fputs ("@GOTNTPOFF", file);
7464 break;
7465 case UNSPEC_INDNTPOFF:
7466 fputs ("@INDNTPOFF", file);
7467 break;
7468 default:
7469 output_operand_lossage ("invalid UNSPEC as operand");
7470 break;
7472 break;
7474 default:
7475 output_operand_lossage ("invalid expression as operand");
7479 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7480 We need to emit DTP-relative relocations. */
7482 static void
7483 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7485 fputs (ASM_LONG, file);
7486 output_addr_const (file, x);
7487 fputs ("@DTPOFF", file);
7488 switch (size)
7490 case 4:
7491 break;
7492 case 8:
7493 fputs (", 0", file);
7494 break;
7495 default:
7496 gcc_unreachable ();
7500 /* In the name of slightly smaller debug output, and to cater to
7501 general assembler lossage, recognize PIC+GOTOFF and turn it back
7502 into a direct symbol reference.
7504 On Darwin, this is necessary to avoid a crash, because Darwin
7505 has a different PIC label for each routine but the DWARF debugging
7506 information is not associated with any particular routine, so it's
7507 necessary to remove references to the PIC label from RTL stored by
7508 the DWARF output code. */
7510 static rtx
7511 ix86_delegitimize_address (rtx orig_x)
7513 rtx x = orig_x;
7514 /* reg_addend is NULL or a multiple of some register. */
7515 rtx reg_addend = NULL_RTX;
7516 /* const_addend is NULL or a const_int. */
7517 rtx const_addend = NULL_RTX;
7518 /* This is the result, or NULL. */
7519 rtx result = NULL_RTX;
7521 if (GET_CODE (x) == MEM)
7522 x = XEXP (x, 0);
7524 if (TARGET_64BIT)
7526 if (GET_CODE (x) != CONST
7527 || GET_CODE (XEXP (x, 0)) != UNSPEC
7528 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7529 || GET_CODE (orig_x) != MEM)
7530 return orig_x;
7531 return XVECEXP (XEXP (x, 0), 0, 0);
7534 if (GET_CODE (x) != PLUS
7535 || GET_CODE (XEXP (x, 1)) != CONST)
7536 return orig_x;
7538 if (GET_CODE (XEXP (x, 0)) == REG
7539 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7540 /* %ebx + GOT/GOTOFF */
7542 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7544 /* %ebx + %reg * scale + GOT/GOTOFF */
7545 reg_addend = XEXP (x, 0);
7546 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7547 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7548 reg_addend = XEXP (reg_addend, 1);
7549 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7550 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7551 reg_addend = XEXP (reg_addend, 0);
7552 else
7553 return orig_x;
7554 if (GET_CODE (reg_addend) != REG
7555 && GET_CODE (reg_addend) != MULT
7556 && GET_CODE (reg_addend) != ASHIFT)
7557 return orig_x;
7559 else
7560 return orig_x;
7562 x = XEXP (XEXP (x, 1), 0);
7563 if (GET_CODE (x) == PLUS
7564 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7566 const_addend = XEXP (x, 1);
7567 x = XEXP (x, 0);
7570 if (GET_CODE (x) == UNSPEC
7571 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7572 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7573 result = XVECEXP (x, 0, 0);
7575 if (TARGET_MACHO && darwin_local_data_pic (x)
7576 && GET_CODE (orig_x) != MEM)
7577 result = XEXP (x, 0);
7579 if (! result)
7580 return orig_x;
7582 if (const_addend)
7583 result = gen_rtx_PLUS (Pmode, result, const_addend);
7584 if (reg_addend)
7585 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7586 return result;
7589 static void
7590 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7591 int fp, FILE *file)
7593 const char *suffix;
7595 if (mode == CCFPmode || mode == CCFPUmode)
7597 enum rtx_code second_code, bypass_code;
7598 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7599 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7600 code = ix86_fp_compare_code_to_integer (code);
7601 mode = CCmode;
7603 if (reverse)
7604 code = reverse_condition (code);
7606 switch (code)
7608 case EQ:
7609 suffix = "e";
7610 break;
7611 case NE:
7612 suffix = "ne";
7613 break;
7614 case GT:
7615 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7616 suffix = "g";
7617 break;
7618 case GTU:
7619 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7620 Those same assemblers have the same but opposite lossage on cmov. */
7621 gcc_assert (mode == CCmode);
7622 suffix = fp ? "nbe" : "a";
7623 break;
7624 case LT:
7625 switch (mode)
7627 case CCNOmode:
7628 case CCGOCmode:
7629 suffix = "s";
7630 break;
7632 case CCmode:
7633 case CCGCmode:
7634 suffix = "l";
7635 break;
7637 default:
7638 gcc_unreachable ();
7640 break;
7641 case LTU:
7642 gcc_assert (mode == CCmode);
7643 suffix = "b";
7644 break;
7645 case GE:
7646 switch (mode)
7648 case CCNOmode:
7649 case CCGOCmode:
7650 suffix = "ns";
7651 break;
7653 case CCmode:
7654 case CCGCmode:
7655 suffix = "ge";
7656 break;
7658 default:
7659 gcc_unreachable ();
7661 break;
7662 case GEU:
7663 /* ??? As above. */
7664 gcc_assert (mode == CCmode);
7665 suffix = fp ? "nb" : "ae";
7666 break;
7667 case LE:
7668 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7669 suffix = "le";
7670 break;
7671 case LEU:
7672 gcc_assert (mode == CCmode);
7673 suffix = "be";
7674 break;
7675 case UNORDERED:
7676 suffix = fp ? "u" : "p";
7677 break;
7678 case ORDERED:
7679 suffix = fp ? "nu" : "np";
7680 break;
7681 default:
7682 gcc_unreachable ();
7684 fputs (suffix, file);
7687 /* Print the name of register X to FILE based on its machine mode and number.
7688 If CODE is 'w', pretend the mode is HImode.
7689 If CODE is 'b', pretend the mode is QImode.
7690 If CODE is 'k', pretend the mode is SImode.
7691 If CODE is 'q', pretend the mode is DImode.
7692 If CODE is 'h', pretend the reg is the 'high' byte register.
7693 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7695 void
7696 print_reg (rtx x, int code, FILE *file)
7698 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7699 && REGNO (x) != FRAME_POINTER_REGNUM
7700 && REGNO (x) != FLAGS_REG
7701 && REGNO (x) != FPSR_REG
7702 && REGNO (x) != FPCR_REG);
7704 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7705 putc ('%', file);
7707 if (code == 'w' || MMX_REG_P (x))
7708 code = 2;
7709 else if (code == 'b')
7710 code = 1;
7711 else if (code == 'k')
7712 code = 4;
7713 else if (code == 'q')
7714 code = 8;
7715 else if (code == 'y')
7716 code = 3;
7717 else if (code == 'h')
7718 code = 0;
7719 else
7720 code = GET_MODE_SIZE (GET_MODE (x));
7722 /* Irritatingly, AMD extended registers use different naming convention
7723 from the normal registers. */
7724 if (REX_INT_REG_P (x))
7726 gcc_assert (TARGET_64BIT);
7727 switch (code)
7729 case 0:
7730 error ("extended registers have no high halves");
7731 break;
7732 case 1:
7733 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7734 break;
7735 case 2:
7736 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7737 break;
7738 case 4:
7739 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7740 break;
7741 case 8:
7742 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7743 break;
7744 default:
7745 error ("unsupported operand size for extended register");
7746 break;
7748 return;
7750 switch (code)
7752 case 3:
7753 if (STACK_TOP_P (x))
7755 fputs ("st(0)", file);
7756 break;
7758 /* FALLTHRU */
7759 case 8:
7760 case 4:
7761 case 12:
7762 if (! ANY_FP_REG_P (x))
7763 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7764 /* FALLTHRU */
7765 case 16:
7766 case 2:
7767 normal:
7768 fputs (hi_reg_name[REGNO (x)], file);
7769 break;
7770 case 1:
7771 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7772 goto normal;
7773 fputs (qi_reg_name[REGNO (x)], file);
7774 break;
7775 case 0:
7776 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7777 goto normal;
7778 fputs (qi_high_reg_name[REGNO (x)], file);
7779 break;
7780 default:
7781 gcc_unreachable ();
7785 /* Locate some local-dynamic symbol still in use by this function
7786 so that we can print its name in some tls_local_dynamic_base
7787 pattern. */
7789 static const char *
7790 get_some_local_dynamic_name (void)
7792 rtx insn;
7794 if (cfun->machine->some_ld_name)
7795 return cfun->machine->some_ld_name;
7797 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7798 if (INSN_P (insn)
7799 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7800 return cfun->machine->some_ld_name;
7802 gcc_unreachable ();
7805 static int
7806 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7808 rtx x = *px;
7810 if (GET_CODE (x) == SYMBOL_REF
7811 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7813 cfun->machine->some_ld_name = XSTR (x, 0);
7814 return 1;
7817 return 0;
7820 /* Meaning of CODE:
7821 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7822 C -- print opcode suffix for set/cmov insn.
7823 c -- like C, but print reversed condition
7824 F,f -- likewise, but for floating-point.
7825 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7826 otherwise nothing
7827 R -- print the prefix for register names.
7828 z -- print the opcode suffix for the size of the current operand.
7829 * -- print a star (in certain assembler syntax)
7830 A -- print an absolute memory reference.
7831 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7832 s -- print a shift double count, followed by the assemblers argument
7833 delimiter.
7834 b -- print the QImode name of the register for the indicated operand.
7835 %b0 would print %al if operands[0] is reg 0.
7836 w -- likewise, print the HImode name of the register.
7837 k -- likewise, print the SImode name of the register.
7838 q -- likewise, print the DImode name of the register.
7839 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7840 y -- print "st(0)" instead of "st" as a register.
7841 D -- print condition for SSE cmp instruction.
7842 P -- if PIC, print an @PLT suffix.
7843 X -- don't print any sort of PIC '@' suffix for a symbol.
7844 & -- print some in-use local-dynamic symbol name.
7845 H -- print a memory address offset by 8; used for sse high-parts
7848 void
7849 print_operand (FILE *file, rtx x, int code)
7851 if (code)
7853 switch (code)
7855 case '*':
7856 if (ASSEMBLER_DIALECT == ASM_ATT)
7857 putc ('*', file);
7858 return;
7860 case '&':
7861 assemble_name (file, get_some_local_dynamic_name ());
7862 return;
7864 case 'A':
7865 switch (ASSEMBLER_DIALECT)
7867 case ASM_ATT:
7868 putc ('*', file);
7869 break;
7871 case ASM_INTEL:
7872 /* Intel syntax. For absolute addresses, registers should not
7873 be surrounded by braces. */
7874 if (GET_CODE (x) != REG)
7876 putc ('[', file);
7877 PRINT_OPERAND (file, x, 0);
7878 putc (']', file);
7879 return;
7881 break;
7883 default:
7884 gcc_unreachable ();
7887 PRINT_OPERAND (file, x, 0);
7888 return;
7891 case 'L':
7892 if (ASSEMBLER_DIALECT == ASM_ATT)
7893 putc ('l', file);
7894 return;
7896 case 'W':
7897 if (ASSEMBLER_DIALECT == ASM_ATT)
7898 putc ('w', file);
7899 return;
7901 case 'B':
7902 if (ASSEMBLER_DIALECT == ASM_ATT)
7903 putc ('b', file);
7904 return;
7906 case 'Q':
7907 if (ASSEMBLER_DIALECT == ASM_ATT)
7908 putc ('l', file);
7909 return;
7911 case 'S':
7912 if (ASSEMBLER_DIALECT == ASM_ATT)
7913 putc ('s', file);
7914 return;
7916 case 'T':
7917 if (ASSEMBLER_DIALECT == ASM_ATT)
7918 putc ('t', file);
7919 return;
7921 case 'z':
7922 /* 387 opcodes don't get size suffixes if the operands are
7923 registers. */
7924 if (STACK_REG_P (x))
7925 return;
7927 /* Likewise if using Intel opcodes. */
7928 if (ASSEMBLER_DIALECT == ASM_INTEL)
7929 return;
7931 /* This is the size of op from size of operand. */
7932 switch (GET_MODE_SIZE (GET_MODE (x)))
7934 case 2:
7935 #ifdef HAVE_GAS_FILDS_FISTS
7936 putc ('s', file);
7937 #endif
7938 return;
7940 case 4:
7941 if (GET_MODE (x) == SFmode)
7943 putc ('s', file);
7944 return;
7946 else
7947 putc ('l', file);
7948 return;
7950 case 12:
7951 case 16:
7952 putc ('t', file);
7953 return;
7955 case 8:
7956 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7958 #ifdef GAS_MNEMONICS
7959 putc ('q', file);
7960 #else
7961 putc ('l', file);
7962 putc ('l', file);
7963 #endif
7965 else
7966 putc ('l', file);
7967 return;
7969 default:
7970 gcc_unreachable ();
7973 case 'b':
7974 case 'w':
7975 case 'k':
7976 case 'q':
7977 case 'h':
7978 case 'y':
7979 case 'X':
7980 case 'P':
7981 break;
7983 case 's':
7984 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7986 PRINT_OPERAND (file, x, 0);
7987 putc (',', file);
7989 return;
7991 case 'D':
7992 /* Little bit of braindamage here. The SSE compare instructions
7993 does use completely different names for the comparisons that the
7994 fp conditional moves. */
7995 switch (GET_CODE (x))
7997 case EQ:
7998 case UNEQ:
7999 fputs ("eq", file);
8000 break;
8001 case LT:
8002 case UNLT:
8003 fputs ("lt", file);
8004 break;
8005 case LE:
8006 case UNLE:
8007 fputs ("le", file);
8008 break;
8009 case UNORDERED:
8010 fputs ("unord", file);
8011 break;
8012 case NE:
8013 case LTGT:
8014 fputs ("neq", file);
8015 break;
8016 case UNGE:
8017 case GE:
8018 fputs ("nlt", file);
8019 break;
8020 case UNGT:
8021 case GT:
8022 fputs ("nle", file);
8023 break;
8024 case ORDERED:
8025 fputs ("ord", file);
8026 break;
8027 default:
8028 gcc_unreachable ();
8030 return;
8031 case 'O':
8032 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8033 if (ASSEMBLER_DIALECT == ASM_ATT)
8035 switch (GET_MODE (x))
8037 case HImode: putc ('w', file); break;
8038 case SImode:
8039 case SFmode: putc ('l', file); break;
8040 case DImode:
8041 case DFmode: putc ('q', file); break;
8042 default: gcc_unreachable ();
8044 putc ('.', file);
8046 #endif
8047 return;
8048 case 'C':
8049 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8050 return;
8051 case 'F':
8052 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8053 if (ASSEMBLER_DIALECT == ASM_ATT)
8054 putc ('.', file);
8055 #endif
8056 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8057 return;
8059 /* Like above, but reverse condition */
8060 case 'c':
8061 /* Check to see if argument to %c is really a constant
8062 and not a condition code which needs to be reversed. */
8063 if (!COMPARISON_P (x))
8065 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8066 return;
8068 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8069 return;
8070 case 'f':
8071 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8072 if (ASSEMBLER_DIALECT == ASM_ATT)
8073 putc ('.', file);
8074 #endif
8075 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8076 return;
8078 case 'H':
8079 /* It doesn't actually matter what mode we use here, as we're
8080 only going to use this for printing. */
8081 x = adjust_address_nv (x, DImode, 8);
8082 break;
8084 case '+':
8086 rtx x;
8088 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8089 return;
8091 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8092 if (x)
8094 int pred_val = INTVAL (XEXP (x, 0));
8096 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8097 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8099 int taken = pred_val > REG_BR_PROB_BASE / 2;
8100 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8102 /* Emit hints only in the case default branch prediction
8103 heuristics would fail. */
8104 if (taken != cputaken)
8106 /* We use 3e (DS) prefix for taken branches and
8107 2e (CS) prefix for not taken branches. */
8108 if (taken)
8109 fputs ("ds ; ", file);
8110 else
8111 fputs ("cs ; ", file);
8115 return;
8117 default:
8118 output_operand_lossage ("invalid operand code '%c'", code);
8122 if (GET_CODE (x) == REG)
8123 print_reg (x, code, file);
8125 else if (GET_CODE (x) == MEM)
8127 /* No `byte ptr' prefix for call instructions. */
8128 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8130 const char * size;
8131 switch (GET_MODE_SIZE (GET_MODE (x)))
8133 case 1: size = "BYTE"; break;
8134 case 2: size = "WORD"; break;
8135 case 4: size = "DWORD"; break;
8136 case 8: size = "QWORD"; break;
8137 case 12: size = "XWORD"; break;
8138 case 16: size = "XMMWORD"; break;
8139 default:
8140 gcc_unreachable ();
8143 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8144 if (code == 'b')
8145 size = "BYTE";
8146 else if (code == 'w')
8147 size = "WORD";
8148 else if (code == 'k')
8149 size = "DWORD";
8151 fputs (size, file);
8152 fputs (" PTR ", file);
8155 x = XEXP (x, 0);
8156 /* Avoid (%rip) for call operands. */
8157 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8158 && GET_CODE (x) != CONST_INT)
8159 output_addr_const (file, x);
8160 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8161 output_operand_lossage ("invalid constraints for operand");
8162 else
8163 output_address (x);
8166 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8168 REAL_VALUE_TYPE r;
8169 long l;
8171 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8172 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8174 if (ASSEMBLER_DIALECT == ASM_ATT)
8175 putc ('$', file);
8176 fprintf (file, "0x%08lx", l);
8179 /* These float cases don't actually occur as immediate operands. */
8180 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8182 char dstr[30];
8184 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8185 fprintf (file, "%s", dstr);
8188 else if (GET_CODE (x) == CONST_DOUBLE
8189 && GET_MODE (x) == XFmode)
8191 char dstr[30];
8193 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8194 fprintf (file, "%s", dstr);
8197 else
8199 /* We have patterns that allow zero sets of memory, for instance.
8200 In 64-bit mode, we should probably support all 8-byte vectors,
8201 since we can in fact encode that into an immediate. */
8202 if (GET_CODE (x) == CONST_VECTOR)
8204 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8205 x = const0_rtx;
8208 if (code != 'P')
8210 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8212 if (ASSEMBLER_DIALECT == ASM_ATT)
8213 putc ('$', file);
8215 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8216 || GET_CODE (x) == LABEL_REF)
8218 if (ASSEMBLER_DIALECT == ASM_ATT)
8219 putc ('$', file);
8220 else
8221 fputs ("OFFSET FLAT:", file);
8224 if (GET_CODE (x) == CONST_INT)
8225 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8226 else if (flag_pic)
8227 output_pic_addr_const (file, x, code);
8228 else
8229 output_addr_const (file, x);
8233 /* Print a memory operand whose address is ADDR. */
8235 void
8236 print_operand_address (FILE *file, rtx addr)
8238 struct ix86_address parts;
8239 rtx base, index, disp;
8240 int scale;
8241 int ok = ix86_decompose_address (addr, &parts);
8243 gcc_assert (ok);
8245 base = parts.base;
8246 index = parts.index;
8247 disp = parts.disp;
8248 scale = parts.scale;
8250 switch (parts.seg)
8252 case SEG_DEFAULT:
8253 break;
8254 case SEG_FS:
8255 case SEG_GS:
8256 if (USER_LABEL_PREFIX[0] == 0)
8257 putc ('%', file);
8258 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8259 break;
8260 default:
8261 gcc_unreachable ();
8264 if (!base && !index)
8266 /* Displacement only requires special attention. */
8268 if (GET_CODE (disp) == CONST_INT)
8270 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8272 if (USER_LABEL_PREFIX[0] == 0)
8273 putc ('%', file);
8274 fputs ("ds:", file);
8276 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8278 else if (flag_pic)
8279 output_pic_addr_const (file, disp, 0);
8280 else
8281 output_addr_const (file, disp);
8283 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8284 if (TARGET_64BIT)
8286 if (GET_CODE (disp) == CONST
8287 && GET_CODE (XEXP (disp, 0)) == PLUS
8288 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8289 disp = XEXP (XEXP (disp, 0), 0);
8290 if (GET_CODE (disp) == LABEL_REF
8291 || (GET_CODE (disp) == SYMBOL_REF
8292 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8293 fputs ("(%rip)", file);
8296 else
8298 if (ASSEMBLER_DIALECT == ASM_ATT)
8300 if (disp)
8302 if (flag_pic)
8303 output_pic_addr_const (file, disp, 0);
8304 else if (GET_CODE (disp) == LABEL_REF)
8305 output_asm_label (disp);
8306 else
8307 output_addr_const (file, disp);
8310 putc ('(', file);
8311 if (base)
8312 print_reg (base, 0, file);
8313 if (index)
8315 putc (',', file);
8316 print_reg (index, 0, file);
8317 if (scale != 1)
8318 fprintf (file, ",%d", scale);
8320 putc (')', file);
8322 else
8324 rtx offset = NULL_RTX;
8326 if (disp)
8328 /* Pull out the offset of a symbol; print any symbol itself. */
8329 if (GET_CODE (disp) == CONST
8330 && GET_CODE (XEXP (disp, 0)) == PLUS
8331 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8333 offset = XEXP (XEXP (disp, 0), 1);
8334 disp = gen_rtx_CONST (VOIDmode,
8335 XEXP (XEXP (disp, 0), 0));
8338 if (flag_pic)
8339 output_pic_addr_const (file, disp, 0);
8340 else if (GET_CODE (disp) == LABEL_REF)
8341 output_asm_label (disp);
8342 else if (GET_CODE (disp) == CONST_INT)
8343 offset = disp;
8344 else
8345 output_addr_const (file, disp);
8348 putc ('[', file);
8349 if (base)
8351 print_reg (base, 0, file);
8352 if (offset)
8354 if (INTVAL (offset) >= 0)
8355 putc ('+', file);
8356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8359 else if (offset)
8360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8361 else
8362 putc ('0', file);
8364 if (index)
8366 putc ('+', file);
8367 print_reg (index, 0, file);
8368 if (scale != 1)
8369 fprintf (file, "*%d", scale);
8371 putc (']', file);
8376 bool
8377 output_addr_const_extra (FILE *file, rtx x)
8379 rtx op;
8381 if (GET_CODE (x) != UNSPEC)
8382 return false;
8384 op = XVECEXP (x, 0, 0);
8385 switch (XINT (x, 1))
8387 case UNSPEC_GOTTPOFF:
8388 output_addr_const (file, op);
8389 /* FIXME: This might be @TPOFF in Sun ld. */
8390 fputs ("@GOTTPOFF", file);
8391 break;
8392 case UNSPEC_TPOFF:
8393 output_addr_const (file, op);
8394 fputs ("@TPOFF", file);
8395 break;
8396 case UNSPEC_NTPOFF:
8397 output_addr_const (file, op);
8398 if (TARGET_64BIT)
8399 fputs ("@TPOFF", file);
8400 else
8401 fputs ("@NTPOFF", file);
8402 break;
8403 case UNSPEC_DTPOFF:
8404 output_addr_const (file, op);
8405 fputs ("@DTPOFF", file);
8406 break;
8407 case UNSPEC_GOTNTPOFF:
8408 output_addr_const (file, op);
8409 if (TARGET_64BIT)
8410 fputs ("@GOTTPOFF(%rip)", file);
8411 else
8412 fputs ("@GOTNTPOFF", file);
8413 break;
8414 case UNSPEC_INDNTPOFF:
8415 output_addr_const (file, op);
8416 fputs ("@INDNTPOFF", file);
8417 break;
8419 default:
8420 return false;
8423 return true;
8426 /* Split one or more DImode RTL references into pairs of SImode
8427 references. The RTL can be REG, offsettable MEM, integer constant, or
8428 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8429 split and "num" is its length. lo_half and hi_half are output arrays
8430 that parallel "operands". */
8432 void
8433 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8435 while (num--)
8437 rtx op = operands[num];
8439 /* simplify_subreg refuse to split volatile memory addresses,
8440 but we still have to handle it. */
8441 if (GET_CODE (op) == MEM)
8443 lo_half[num] = adjust_address (op, SImode, 0);
8444 hi_half[num] = adjust_address (op, SImode, 4);
8446 else
8448 lo_half[num] = simplify_gen_subreg (SImode, op,
8449 GET_MODE (op) == VOIDmode
8450 ? DImode : GET_MODE (op), 0);
8451 hi_half[num] = simplify_gen_subreg (SImode, op,
8452 GET_MODE (op) == VOIDmode
8453 ? DImode : GET_MODE (op), 4);
8457 /* Split one or more TImode RTL references into pairs of DImode
8458 references. The RTL can be REG, offsettable MEM, integer constant, or
8459 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8460 split and "num" is its length. lo_half and hi_half are output arrays
8461 that parallel "operands". */
8463 void
8464 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8466 while (num--)
8468 rtx op = operands[num];
8470 /* simplify_subreg refuse to split volatile memory addresses, but we
8471 still have to handle it. */
8472 if (GET_CODE (op) == MEM)
8474 lo_half[num] = adjust_address (op, DImode, 0);
8475 hi_half[num] = adjust_address (op, DImode, 8);
8477 else
8479 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8480 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8485 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8486 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8487 is the expression of the binary operation. The output may either be
8488 emitted here, or returned to the caller, like all output_* functions.
8490 There is no guarantee that the operands are the same mode, as they
8491 might be within FLOAT or FLOAT_EXTEND expressions. */
8493 #ifndef SYSV386_COMPAT
8494 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8495 wants to fix the assemblers because that causes incompatibility
8496 with gcc. No-one wants to fix gcc because that causes
8497 incompatibility with assemblers... You can use the option of
8498 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8499 #define SYSV386_COMPAT 1
8500 #endif
8502 const char *
8503 output_387_binary_op (rtx insn, rtx *operands)
8505 static char buf[30];
8506 const char *p;
8507 const char *ssep;
8508 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8510 #ifdef ENABLE_CHECKING
8511 /* Even if we do not want to check the inputs, this documents input
8512 constraints. Which helps in understanding the following code. */
8513 if (STACK_REG_P (operands[0])
8514 && ((REG_P (operands[1])
8515 && REGNO (operands[0]) == REGNO (operands[1])
8516 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8517 || (REG_P (operands[2])
8518 && REGNO (operands[0]) == REGNO (operands[2])
8519 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8520 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8521 ; /* ok */
8522 else
8523 gcc_assert (is_sse);
8524 #endif
8526 switch (GET_CODE (operands[3]))
8528 case PLUS:
8529 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8530 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8531 p = "fiadd";
8532 else
8533 p = "fadd";
8534 ssep = "add";
8535 break;
8537 case MINUS:
8538 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8539 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8540 p = "fisub";
8541 else
8542 p = "fsub";
8543 ssep = "sub";
8544 break;
8546 case MULT:
8547 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8548 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8549 p = "fimul";
8550 else
8551 p = "fmul";
8552 ssep = "mul";
8553 break;
8555 case DIV:
8556 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8557 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8558 p = "fidiv";
8559 else
8560 p = "fdiv";
8561 ssep = "div";
8562 break;
8564 default:
8565 gcc_unreachable ();
8568 if (is_sse)
8570 strcpy (buf, ssep);
8571 if (GET_MODE (operands[0]) == SFmode)
8572 strcat (buf, "ss\t{%2, %0|%0, %2}");
8573 else
8574 strcat (buf, "sd\t{%2, %0|%0, %2}");
8575 return buf;
8577 strcpy (buf, p);
8579 switch (GET_CODE (operands[3]))
8581 case MULT:
8582 case PLUS:
8583 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8585 rtx temp = operands[2];
8586 operands[2] = operands[1];
8587 operands[1] = temp;
8590 /* know operands[0] == operands[1]. */
8592 if (GET_CODE (operands[2]) == MEM)
8594 p = "%z2\t%2";
8595 break;
8598 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8600 if (STACK_TOP_P (operands[0]))
8601 /* How is it that we are storing to a dead operand[2]?
8602 Well, presumably operands[1] is dead too. We can't
8603 store the result to st(0) as st(0) gets popped on this
8604 instruction. Instead store to operands[2] (which I
8605 think has to be st(1)). st(1) will be popped later.
8606 gcc <= 2.8.1 didn't have this check and generated
8607 assembly code that the Unixware assembler rejected. */
8608 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8609 else
8610 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8611 break;
8614 if (STACK_TOP_P (operands[0]))
8615 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8616 else
8617 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8618 break;
8620 case MINUS:
8621 case DIV:
8622 if (GET_CODE (operands[1]) == MEM)
8624 p = "r%z1\t%1";
8625 break;
8628 if (GET_CODE (operands[2]) == MEM)
8630 p = "%z2\t%2";
8631 break;
8634 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8636 #if SYSV386_COMPAT
8637 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8638 derived assemblers, confusingly reverse the direction of
8639 the operation for fsub{r} and fdiv{r} when the
8640 destination register is not st(0). The Intel assembler
8641 doesn't have this brain damage. Read !SYSV386_COMPAT to
8642 figure out what the hardware really does. */
8643 if (STACK_TOP_P (operands[0]))
8644 p = "{p\t%0, %2|rp\t%2, %0}";
8645 else
8646 p = "{rp\t%2, %0|p\t%0, %2}";
8647 #else
8648 if (STACK_TOP_P (operands[0]))
8649 /* As above for fmul/fadd, we can't store to st(0). */
8650 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8651 else
8652 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8653 #endif
8654 break;
8657 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8659 #if SYSV386_COMPAT
8660 if (STACK_TOP_P (operands[0]))
8661 p = "{rp\t%0, %1|p\t%1, %0}";
8662 else
8663 p = "{p\t%1, %0|rp\t%0, %1}";
8664 #else
8665 if (STACK_TOP_P (operands[0]))
8666 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8667 else
8668 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8669 #endif
8670 break;
8673 if (STACK_TOP_P (operands[0]))
8675 if (STACK_TOP_P (operands[1]))
8676 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8677 else
8678 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8679 break;
8681 else if (STACK_TOP_P (operands[1]))
8683 #if SYSV386_COMPAT
8684 p = "{\t%1, %0|r\t%0, %1}";
8685 #else
8686 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8687 #endif
8689 else
8691 #if SYSV386_COMPAT
8692 p = "{r\t%2, %0|\t%0, %2}";
8693 #else
8694 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8695 #endif
8697 break;
8699 default:
8700 gcc_unreachable ();
8703 strcat (buf, p);
8704 return buf;
8707 /* Return needed mode for entity in optimize_mode_switching pass. */
8710 ix86_mode_needed (int entity, rtx insn)
8712 enum attr_i387_cw mode;
8714 /* The mode UNINITIALIZED is used to store control word after a
8715 function call or ASM pattern. The mode ANY specify that function
8716 has no requirements on the control word and make no changes in the
8717 bits we are interested in. */
8719 if (CALL_P (insn)
8720 || (NONJUMP_INSN_P (insn)
8721 && (asm_noperands (PATTERN (insn)) >= 0
8722 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8723 return I387_CW_UNINITIALIZED;
8725 if (recog_memoized (insn) < 0)
8726 return I387_CW_ANY;
8728 mode = get_attr_i387_cw (insn);
8730 switch (entity)
8732 case I387_TRUNC:
8733 if (mode == I387_CW_TRUNC)
8734 return mode;
8735 break;
8737 case I387_FLOOR:
8738 if (mode == I387_CW_FLOOR)
8739 return mode;
8740 break;
8742 case I387_CEIL:
8743 if (mode == I387_CW_CEIL)
8744 return mode;
8745 break;
8747 case I387_MASK_PM:
8748 if (mode == I387_CW_MASK_PM)
8749 return mode;
8750 break;
8752 default:
8753 gcc_unreachable ();
8756 return I387_CW_ANY;
8759 /* Output code to initialize control word copies used by trunc?f?i and
8760 rounding patterns. CURRENT_MODE is set to current control word,
8761 while NEW_MODE is set to new control word. */
8763 void
8764 emit_i387_cw_initialization (int mode)
8766 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8767 rtx new_mode;
8769 int slot;
8771 rtx reg = gen_reg_rtx (HImode);
8773 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8774 emit_move_insn (reg, copy_rtx (stored_mode));
8776 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8778 switch (mode)
8780 case I387_CW_TRUNC:
8781 /* round toward zero (truncate) */
8782 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8783 slot = SLOT_CW_TRUNC;
8784 break;
8786 case I387_CW_FLOOR:
8787 /* round down toward -oo */
8788 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8789 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8790 slot = SLOT_CW_FLOOR;
8791 break;
8793 case I387_CW_CEIL:
8794 /* round up toward +oo */
8795 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8796 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8797 slot = SLOT_CW_CEIL;
8798 break;
8800 case I387_CW_MASK_PM:
8801 /* mask precision exception for nearbyint() */
8802 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8803 slot = SLOT_CW_MASK_PM;
8804 break;
8806 default:
8807 gcc_unreachable ();
8810 else
8812 switch (mode)
8814 case I387_CW_TRUNC:
8815 /* round toward zero (truncate) */
8816 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8817 slot = SLOT_CW_TRUNC;
8818 break;
8820 case I387_CW_FLOOR:
8821 /* round down toward -oo */
8822 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8823 slot = SLOT_CW_FLOOR;
8824 break;
8826 case I387_CW_CEIL:
8827 /* round up toward +oo */
8828 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8829 slot = SLOT_CW_CEIL;
8830 break;
8832 case I387_CW_MASK_PM:
8833 /* mask precision exception for nearbyint() */
8834 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8835 slot = SLOT_CW_MASK_PM;
8836 break;
8838 default:
8839 gcc_unreachable ();
8843 gcc_assert (slot < MAX_386_STACK_LOCALS);
8845 new_mode = assign_386_stack_local (HImode, slot);
8846 emit_move_insn (new_mode, reg);
8849 /* Output code for INSN to convert a float to a signed int. OPERANDS
8850 are the insn operands. The output may be [HSD]Imode and the input
8851 operand may be [SDX]Fmode. */
8853 const char *
8854 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8856 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8857 int dimode_p = GET_MODE (operands[0]) == DImode;
8858 int round_mode = get_attr_i387_cw (insn);
8860 /* Jump through a hoop or two for DImode, since the hardware has no
8861 non-popping instruction. We used to do this a different way, but
8862 that was somewhat fragile and broke with post-reload splitters. */
8863 if ((dimode_p || fisttp) && !stack_top_dies)
8864 output_asm_insn ("fld\t%y1", operands);
8866 gcc_assert (STACK_TOP_P (operands[1]));
8867 gcc_assert (GET_CODE (operands[0]) == MEM);
8869 if (fisttp)
8870 output_asm_insn ("fisttp%z0\t%0", operands);
8871 else
8873 if (round_mode != I387_CW_ANY)
8874 output_asm_insn ("fldcw\t%3", operands);
8875 if (stack_top_dies || dimode_p)
8876 output_asm_insn ("fistp%z0\t%0", operands);
8877 else
8878 output_asm_insn ("fist%z0\t%0", operands);
8879 if (round_mode != I387_CW_ANY)
8880 output_asm_insn ("fldcw\t%2", operands);
8883 return "";
8886 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8887 have the values zero or one, indicates the ffreep insn's operand
8888 from the OPERANDS array. */
8890 static const char *
8891 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8893 if (TARGET_USE_FFREEP)
8894 #if HAVE_AS_IX86_FFREEP
8895 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8896 #else
8898 static char retval[] = ".word\t0xc_df";
8899 int regno = REGNO (operands[opno]);
8901 gcc_assert (FP_REGNO_P (regno));
8903 retval[9] = '0' + (regno - FIRST_STACK_REG);
8904 return retval;
8906 #endif
8908 return opno ? "fstp\t%y1" : "fstp\t%y0";
8912 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8913 should be used. UNORDERED_P is true when fucom should be used. */
8915 const char *
8916 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8918 int stack_top_dies;
8919 rtx cmp_op0, cmp_op1;
8920 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8922 if (eflags_p)
8924 cmp_op0 = operands[0];
8925 cmp_op1 = operands[1];
8927 else
8929 cmp_op0 = operands[1];
8930 cmp_op1 = operands[2];
8933 if (is_sse)
8935 if (GET_MODE (operands[0]) == SFmode)
8936 if (unordered_p)
8937 return "ucomiss\t{%1, %0|%0, %1}";
8938 else
8939 return "comiss\t{%1, %0|%0, %1}";
8940 else
8941 if (unordered_p)
8942 return "ucomisd\t{%1, %0|%0, %1}";
8943 else
8944 return "comisd\t{%1, %0|%0, %1}";
8947 gcc_assert (STACK_TOP_P (cmp_op0));
8949 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8951 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8953 if (stack_top_dies)
8955 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8956 return output_387_ffreep (operands, 1);
8958 else
8959 return "ftst\n\tfnstsw\t%0";
8962 if (STACK_REG_P (cmp_op1)
8963 && stack_top_dies
8964 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8965 && REGNO (cmp_op1) != FIRST_STACK_REG)
8967 /* If both the top of the 387 stack dies, and the other operand
8968 is also a stack register that dies, then this must be a
8969 `fcompp' float compare */
8971 if (eflags_p)
8973 /* There is no double popping fcomi variant. Fortunately,
8974 eflags is immune from the fstp's cc clobbering. */
8975 if (unordered_p)
8976 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8977 else
8978 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8979 return output_387_ffreep (operands, 0);
8981 else
8983 if (unordered_p)
8984 return "fucompp\n\tfnstsw\t%0";
8985 else
8986 return "fcompp\n\tfnstsw\t%0";
8989 else
8991 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8993 static const char * const alt[16] =
8995 "fcom%z2\t%y2\n\tfnstsw\t%0",
8996 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8997 "fucom%z2\t%y2\n\tfnstsw\t%0",
8998 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9000 "ficom%z2\t%y2\n\tfnstsw\t%0",
9001 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9002 NULL,
9003 NULL,
9005 "fcomi\t{%y1, %0|%0, %y1}",
9006 "fcomip\t{%y1, %0|%0, %y1}",
9007 "fucomi\t{%y1, %0|%0, %y1}",
9008 "fucomip\t{%y1, %0|%0, %y1}",
9010 NULL,
9011 NULL,
9012 NULL,
9013 NULL
9016 int mask;
9017 const char *ret;
9019 mask = eflags_p << 3;
9020 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9021 mask |= unordered_p << 1;
9022 mask |= stack_top_dies;
9024 gcc_assert (mask < 16);
9025 ret = alt[mask];
9026 gcc_assert (ret);
9028 return ret;
9032 void
9033 ix86_output_addr_vec_elt (FILE *file, int value)
9035 const char *directive = ASM_LONG;
9037 #ifdef ASM_QUAD
9038 if (TARGET_64BIT)
9039 directive = ASM_QUAD;
9040 #else
9041 gcc_assert (!TARGET_64BIT);
9042 #endif
9044 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9047 void
9048 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9050 if (TARGET_64BIT)
9051 fprintf (file, "%s%s%d-%s%d\n",
9052 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9053 else if (HAVE_AS_GOTOFF_IN_DATA)
9054 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9055 #if TARGET_MACHO
9056 else if (TARGET_MACHO)
9058 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9059 machopic_output_function_base_name (file);
9060 fprintf(file, "\n");
9062 #endif
9063 else
9064 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9065 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9068 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9069 for the target. */
9071 void
9072 ix86_expand_clear (rtx dest)
9074 rtx tmp;
9076 /* We play register width games, which are only valid after reload. */
9077 gcc_assert (reload_completed);
9079 /* Avoid HImode and its attendant prefix byte. */
9080 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9081 dest = gen_rtx_REG (SImode, REGNO (dest));
9083 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9085 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9086 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9088 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9089 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9092 emit_insn (tmp);
9095 /* X is an unchanging MEM. If it is a constant pool reference, return
9096 the constant pool rtx, else NULL. */
9099 maybe_get_pool_constant (rtx x)
9101 x = ix86_delegitimize_address (XEXP (x, 0));
9103 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9104 return get_pool_constant (x);
9106 return NULL_RTX;
9109 void
9110 ix86_expand_move (enum machine_mode mode, rtx operands[])
9112 int strict = (reload_in_progress || reload_completed);
9113 rtx op0, op1;
9114 enum tls_model model;
9116 op0 = operands[0];
9117 op1 = operands[1];
9119 if (GET_CODE (op1) == SYMBOL_REF)
9121 model = SYMBOL_REF_TLS_MODEL (op1);
9122 if (model)
9124 op1 = legitimize_tls_address (op1, model, true);
9125 op1 = force_operand (op1, op0);
9126 if (op1 == op0)
9127 return;
9130 else if (GET_CODE (op1) == CONST
9131 && GET_CODE (XEXP (op1, 0)) == PLUS
9132 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9134 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9135 if (model)
9137 rtx addend = XEXP (XEXP (op1, 0), 1);
9138 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9139 op1 = force_operand (op1, NULL);
9140 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9141 op0, 1, OPTAB_DIRECT);
9142 if (op1 == op0)
9143 return;
9147 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9149 if (TARGET_MACHO && !TARGET_64BIT)
9151 #if TARGET_MACHO
9152 if (MACHOPIC_PURE)
9154 rtx temp = ((reload_in_progress
9155 || ((op0 && GET_CODE (op0) == REG)
9156 && mode == Pmode))
9157 ? op0 : gen_reg_rtx (Pmode));
9158 op1 = machopic_indirect_data_reference (op1, temp);
9159 op1 = machopic_legitimize_pic_address (op1, mode,
9160 temp == op1 ? 0 : temp);
9162 else if (MACHOPIC_INDIRECT)
9163 op1 = machopic_indirect_data_reference (op1, 0);
9164 if (op0 == op1)
9165 return;
9166 #endif
9168 else
9170 if (GET_CODE (op0) == MEM)
9171 op1 = force_reg (Pmode, op1);
9172 else
9173 op1 = legitimize_address (op1, op1, Pmode);
9176 else
9178 if (GET_CODE (op0) == MEM
9179 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9180 || !push_operand (op0, mode))
9181 && GET_CODE (op1) == MEM)
9182 op1 = force_reg (mode, op1);
9184 if (push_operand (op0, mode)
9185 && ! general_no_elim_operand (op1, mode))
9186 op1 = copy_to_mode_reg (mode, op1);
9188 /* Force large constants in 64bit compilation into register
9189 to get them CSEed. */
9190 if (TARGET_64BIT && mode == DImode
9191 && immediate_operand (op1, mode)
9192 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9193 && !register_operand (op0, mode)
9194 && optimize && !reload_completed && !reload_in_progress)
9195 op1 = copy_to_mode_reg (mode, op1);
9197 if (FLOAT_MODE_P (mode))
9199 /* If we are loading a floating point constant to a register,
9200 force the value to memory now, since we'll get better code
9201 out the back end. */
9203 if (strict)
9205 else if (GET_CODE (op1) == CONST_DOUBLE)
9207 op1 = validize_mem (force_const_mem (mode, op1));
9208 if (!register_operand (op0, mode))
9210 rtx temp = gen_reg_rtx (mode);
9211 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9212 emit_move_insn (op0, temp);
9213 return;
9219 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9222 void
9223 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9225 rtx op0 = operands[0], op1 = operands[1];
9227 /* Force constants other than zero into memory. We do not know how
9228 the instructions used to build constants modify the upper 64 bits
9229 of the register, once we have that information we may be able
9230 to handle some of them more efficiently. */
9231 if ((reload_in_progress | reload_completed) == 0
9232 && register_operand (op0, mode)
9233 && CONSTANT_P (op1)
9234 && standard_sse_constant_p (op1) <= 0)
9235 op1 = validize_mem (force_const_mem (mode, op1));
9237 /* Make operand1 a register if it isn't already. */
9238 if (!no_new_pseudos
9239 && !register_operand (op0, mode)
9240 && !register_operand (op1, mode))
9242 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9243 return;
9246 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9249 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9250 straight to ix86_expand_vector_move. */
9252 void
9253 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9255 rtx op0, op1, m;
9257 op0 = operands[0];
9258 op1 = operands[1];
9260 if (MEM_P (op1))
9262 /* If we're optimizing for size, movups is the smallest. */
9263 if (optimize_size)
9265 op0 = gen_lowpart (V4SFmode, op0);
9266 op1 = gen_lowpart (V4SFmode, op1);
9267 emit_insn (gen_sse_movups (op0, op1));
9268 return;
9271 /* ??? If we have typed data, then it would appear that using
9272 movdqu is the only way to get unaligned data loaded with
9273 integer type. */
9274 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9276 op0 = gen_lowpart (V16QImode, op0);
9277 op1 = gen_lowpart (V16QImode, op1);
9278 emit_insn (gen_sse2_movdqu (op0, op1));
9279 return;
9282 if (TARGET_SSE2 && mode == V2DFmode)
9284 rtx zero;
9286 /* When SSE registers are split into halves, we can avoid
9287 writing to the top half twice. */
9288 if (TARGET_SSE_SPLIT_REGS)
9290 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9291 zero = op0;
9293 else
9295 /* ??? Not sure about the best option for the Intel chips.
9296 The following would seem to satisfy; the register is
9297 entirely cleared, breaking the dependency chain. We
9298 then store to the upper half, with a dependency depth
9299 of one. A rumor has it that Intel recommends two movsd
9300 followed by an unpacklpd, but this is unconfirmed. And
9301 given that the dependency depth of the unpacklpd would
9302 still be one, I'm not sure why this would be better. */
9303 zero = CONST0_RTX (V2DFmode);
9306 m = adjust_address (op1, DFmode, 0);
9307 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9308 m = adjust_address (op1, DFmode, 8);
9309 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9311 else
9313 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9314 emit_move_insn (op0, CONST0_RTX (mode));
9315 else
9316 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9318 if (mode != V4SFmode)
9319 op0 = gen_lowpart (V4SFmode, op0);
9320 m = adjust_address (op1, V2SFmode, 0);
9321 emit_insn (gen_sse_loadlps (op0, op0, m));
9322 m = adjust_address (op1, V2SFmode, 8);
9323 emit_insn (gen_sse_loadhps (op0, op0, m));
9326 else if (MEM_P (op0))
9328 /* If we're optimizing for size, movups is the smallest. */
9329 if (optimize_size)
9331 op0 = gen_lowpart (V4SFmode, op0);
9332 op1 = gen_lowpart (V4SFmode, op1);
9333 emit_insn (gen_sse_movups (op0, op1));
9334 return;
9337 /* ??? Similar to above, only less clear because of quote
9338 typeless stores unquote. */
9339 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9340 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9342 op0 = gen_lowpart (V16QImode, op0);
9343 op1 = gen_lowpart (V16QImode, op1);
9344 emit_insn (gen_sse2_movdqu (op0, op1));
9345 return;
9348 if (TARGET_SSE2 && mode == V2DFmode)
9350 m = adjust_address (op0, DFmode, 0);
9351 emit_insn (gen_sse2_storelpd (m, op1));
9352 m = adjust_address (op0, DFmode, 8);
9353 emit_insn (gen_sse2_storehpd (m, op1));
9355 else
9357 if (mode != V4SFmode)
9358 op1 = gen_lowpart (V4SFmode, op1);
9359 m = adjust_address (op0, V2SFmode, 0);
9360 emit_insn (gen_sse_storelps (m, op1));
9361 m = adjust_address (op0, V2SFmode, 8);
9362 emit_insn (gen_sse_storehps (m, op1));
9365 else
9366 gcc_unreachable ();
9369 /* Expand a push in MODE. This is some mode for which we do not support
9370 proper push instructions, at least from the registers that we expect
9371 the value to live in. */
9373 void
9374 ix86_expand_push (enum machine_mode mode, rtx x)
9376 rtx tmp;
9378 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9379 GEN_INT (-GET_MODE_SIZE (mode)),
9380 stack_pointer_rtx, 1, OPTAB_DIRECT);
9381 if (tmp != stack_pointer_rtx)
9382 emit_move_insn (stack_pointer_rtx, tmp);
9384 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9385 emit_move_insn (tmp, x);
9388 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9389 destination to use for the operation. If different from the true
9390 destination in operands[0], a copy operation will be required. */
9393 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9394 rtx operands[])
9396 int matching_memory;
9397 rtx src1, src2, dst;
9399 dst = operands[0];
9400 src1 = operands[1];
9401 src2 = operands[2];
9403 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9404 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9405 && (rtx_equal_p (dst, src2)
9406 || immediate_operand (src1, mode)))
9408 rtx temp = src1;
9409 src1 = src2;
9410 src2 = temp;
9413 /* If the destination is memory, and we do not have matching source
9414 operands, do things in registers. */
9415 matching_memory = 0;
9416 if (GET_CODE (dst) == MEM)
9418 if (rtx_equal_p (dst, src1))
9419 matching_memory = 1;
9420 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9421 && rtx_equal_p (dst, src2))
9422 matching_memory = 2;
9423 else
9424 dst = gen_reg_rtx (mode);
9427 /* Both source operands cannot be in memory. */
9428 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9430 if (matching_memory != 2)
9431 src2 = force_reg (mode, src2);
9432 else
9433 src1 = force_reg (mode, src1);
9436 /* If the operation is not commutable, source 1 cannot be a constant
9437 or non-matching memory. */
9438 if ((CONSTANT_P (src1)
9439 || (!matching_memory && GET_CODE (src1) == MEM))
9440 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9441 src1 = force_reg (mode, src1);
9443 src1 = operands[1] = src1;
9444 src2 = operands[2] = src2;
9445 return dst;
9448 /* Similarly, but assume that the destination has already been
9449 set up properly. */
9451 void
9452 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9453 enum machine_mode mode, rtx operands[])
9455 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9456 gcc_assert (dst == operands[0]);
9459 /* Attempt to expand a binary operator. Make the expansion closer to the
9460 actual machine, then just general_operand, which will allow 3 separate
9461 memory references (one output, two input) in a single insn. */
9463 void
9464 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9465 rtx operands[])
9467 rtx src1, src2, dst, op, clob;
9469 dst = ix86_fixup_binary_operands (code, mode, operands);
9470 src1 = operands[1];
9471 src2 = operands[2];
9473 /* Emit the instruction. */
9475 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9476 if (reload_in_progress)
9478 /* Reload doesn't know about the flags register, and doesn't know that
9479 it doesn't want to clobber it. We can only do this with PLUS. */
9480 gcc_assert (code == PLUS);
9481 emit_insn (op);
9483 else
9485 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9486 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9489 /* Fix up the destination if needed. */
9490 if (dst != operands[0])
9491 emit_move_insn (operands[0], dst);
9494 /* Return TRUE or FALSE depending on whether the binary operator meets the
9495 appropriate constraints. */
9498 ix86_binary_operator_ok (enum rtx_code code,
9499 enum machine_mode mode ATTRIBUTE_UNUSED,
9500 rtx operands[3])
9502 /* Both source operands cannot be in memory. */
9503 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9504 return 0;
9505 /* If the operation is not commutable, source 1 cannot be a constant. */
9506 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9507 return 0;
9508 /* If the destination is memory, we must have a matching source operand. */
9509 if (GET_CODE (operands[0]) == MEM
9510 && ! (rtx_equal_p (operands[0], operands[1])
9511 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9512 && rtx_equal_p (operands[0], operands[2]))))
9513 return 0;
9514 /* If the operation is not commutable and the source 1 is memory, we must
9515 have a matching destination. */
9516 if (GET_CODE (operands[1]) == MEM
9517 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9518 && ! rtx_equal_p (operands[0], operands[1]))
9519 return 0;
9520 return 1;
9523 /* Attempt to expand a unary operator. Make the expansion closer to the
9524 actual machine, then just general_operand, which will allow 2 separate
9525 memory references (one output, one input) in a single insn. */
9527 void
9528 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9529 rtx operands[])
9531 int matching_memory;
9532 rtx src, dst, op, clob;
9534 dst = operands[0];
9535 src = operands[1];
9537 /* If the destination is memory, and we do not have matching source
9538 operands, do things in registers. */
9539 matching_memory = 0;
9540 if (MEM_P (dst))
9542 if (rtx_equal_p (dst, src))
9543 matching_memory = 1;
9544 else
9545 dst = gen_reg_rtx (mode);
9548 /* When source operand is memory, destination must match. */
9549 if (MEM_P (src) && !matching_memory)
9550 src = force_reg (mode, src);
9552 /* Emit the instruction. */
9554 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9555 if (reload_in_progress || code == NOT)
9557 /* Reload doesn't know about the flags register, and doesn't know that
9558 it doesn't want to clobber it. */
9559 gcc_assert (code == NOT);
9560 emit_insn (op);
9562 else
9564 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9565 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9568 /* Fix up the destination if needed. */
9569 if (dst != operands[0])
9570 emit_move_insn (operands[0], dst);
9573 /* Return TRUE or FALSE depending on whether the unary operator meets the
9574 appropriate constraints. */
9577 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9578 enum machine_mode mode ATTRIBUTE_UNUSED,
9579 rtx operands[2] ATTRIBUTE_UNUSED)
9581 /* If one of operands is memory, source and destination must match. */
9582 if ((GET_CODE (operands[0]) == MEM
9583 || GET_CODE (operands[1]) == MEM)
9584 && ! rtx_equal_p (operands[0], operands[1]))
9585 return FALSE;
9586 return TRUE;
9589 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9590 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9591 true, then replicate the mask for all elements of the vector register.
9592 If INVERT is true, then create a mask excluding the sign bit. */
9595 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9597 enum machine_mode vec_mode;
9598 HOST_WIDE_INT hi, lo;
9599 int shift = 63;
9600 rtvec v;
9601 rtx mask;
9603 /* Find the sign bit, sign extended to 2*HWI. */
9604 if (mode == SFmode)
9605 lo = 0x80000000, hi = lo < 0;
9606 else if (HOST_BITS_PER_WIDE_INT >= 64)
9607 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9608 else
9609 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9611 if (invert)
9612 lo = ~lo, hi = ~hi;
9614 /* Force this value into the low part of a fp vector constant. */
9615 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9616 mask = gen_lowpart (mode, mask);
9618 if (mode == SFmode)
9620 if (vect)
9621 v = gen_rtvec (4, mask, mask, mask, mask);
9622 else
9623 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9624 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9625 vec_mode = V4SFmode;
9627 else
9629 if (vect)
9630 v = gen_rtvec (2, mask, mask);
9631 else
9632 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9633 vec_mode = V2DFmode;
9636 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9639 /* Generate code for floating point ABS or NEG. */
9641 void
9642 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9643 rtx operands[])
9645 rtx mask, set, use, clob, dst, src;
9646 bool matching_memory;
9647 bool use_sse = false;
9648 bool vector_mode = VECTOR_MODE_P (mode);
9649 enum machine_mode elt_mode = mode;
9651 if (vector_mode)
9653 elt_mode = GET_MODE_INNER (mode);
9654 use_sse = true;
9656 else if (TARGET_SSE_MATH)
9657 use_sse = SSE_FLOAT_MODE_P (mode);
9659 /* NEG and ABS performed with SSE use bitwise mask operations.
9660 Create the appropriate mask now. */
9661 if (use_sse)
9662 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9663 else
9664 mask = NULL_RTX;
9666 dst = operands[0];
9667 src = operands[1];
9669 /* If the destination is memory, and we don't have matching source
9670 operands or we're using the x87, do things in registers. */
9671 matching_memory = false;
9672 if (MEM_P (dst))
9674 if (use_sse && rtx_equal_p (dst, src))
9675 matching_memory = true;
9676 else
9677 dst = gen_reg_rtx (mode);
9679 if (MEM_P (src) && !matching_memory)
9680 src = force_reg (mode, src);
9682 if (vector_mode)
9684 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9685 set = gen_rtx_SET (VOIDmode, dst, set);
9686 emit_insn (set);
9688 else
9690 set = gen_rtx_fmt_e (code, mode, src);
9691 set = gen_rtx_SET (VOIDmode, dst, set);
9692 if (mask)
9694 use = gen_rtx_USE (VOIDmode, mask);
9695 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9696 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9697 gen_rtvec (3, set, use, clob)));
9699 else
9700 emit_insn (set);
9703 if (dst != operands[0])
9704 emit_move_insn (operands[0], dst);
9707 /* Expand a copysign operation. Special case operand 0 being a constant. */
9709 void
9710 ix86_expand_copysign (rtx operands[])
9712 enum machine_mode mode, vmode;
9713 rtx dest, op0, op1, mask, nmask;
9715 dest = operands[0];
9716 op0 = operands[1];
9717 op1 = operands[2];
9719 mode = GET_MODE (dest);
9720 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9722 if (GET_CODE (op0) == CONST_DOUBLE)
9724 rtvec v;
9726 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9727 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9729 if (op0 == CONST0_RTX (mode))
9730 op0 = CONST0_RTX (vmode);
9731 else
9733 if (mode == SFmode)
9734 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9735 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9736 else
9737 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9738 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9741 mask = ix86_build_signbit_mask (mode, 0, 0);
9743 if (mode == SFmode)
9744 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9745 else
9746 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9748 else
9750 nmask = ix86_build_signbit_mask (mode, 0, 1);
9751 mask = ix86_build_signbit_mask (mode, 0, 0);
9753 if (mode == SFmode)
9754 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9755 else
9756 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9760 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9761 be a constant, and so has already been expanded into a vector constant. */
9763 void
9764 ix86_split_copysign_const (rtx operands[])
9766 enum machine_mode mode, vmode;
9767 rtx dest, op0, op1, mask, x;
9769 dest = operands[0];
9770 op0 = operands[1];
9771 op1 = operands[2];
9772 mask = operands[3];
9774 mode = GET_MODE (dest);
9775 vmode = GET_MODE (mask);
9777 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9778 x = gen_rtx_AND (vmode, dest, mask);
9779 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9781 if (op0 != CONST0_RTX (vmode))
9783 x = gen_rtx_IOR (vmode, dest, op0);
9784 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9788 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9789 so we have to do two masks. */
9791 void
9792 ix86_split_copysign_var (rtx operands[])
9794 enum machine_mode mode, vmode;
9795 rtx dest, scratch, op0, op1, mask, nmask, x;
9797 dest = operands[0];
9798 scratch = operands[1];
9799 op0 = operands[2];
9800 op1 = operands[3];
9801 nmask = operands[4];
9802 mask = operands[5];
9804 mode = GET_MODE (dest);
9805 vmode = GET_MODE (mask);
9807 if (rtx_equal_p (op0, op1))
9809 /* Shouldn't happen often (it's useless, obviously), but when it does
9810 we'd generate incorrect code if we continue below. */
9811 emit_move_insn (dest, op0);
9812 return;
9815 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9817 gcc_assert (REGNO (op1) == REGNO (scratch));
9819 x = gen_rtx_AND (vmode, scratch, mask);
9820 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9822 dest = mask;
9823 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9824 x = gen_rtx_NOT (vmode, dest);
9825 x = gen_rtx_AND (vmode, x, op0);
9826 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9828 else
9830 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9832 x = gen_rtx_AND (vmode, scratch, mask);
9834 else /* alternative 2,4 */
9836 gcc_assert (REGNO (mask) == REGNO (scratch));
9837 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9838 x = gen_rtx_AND (vmode, scratch, op1);
9840 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9842 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9844 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9845 x = gen_rtx_AND (vmode, dest, nmask);
9847 else /* alternative 3,4 */
9849 gcc_assert (REGNO (nmask) == REGNO (dest));
9850 dest = nmask;
9851 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9852 x = gen_rtx_AND (vmode, dest, op0);
9854 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9857 x = gen_rtx_IOR (vmode, dest, scratch);
9858 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9861 /* Return TRUE or FALSE depending on whether the first SET in INSN
9862 has source and destination with matching CC modes, and that the
9863 CC mode is at least as constrained as REQ_MODE. */
9866 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9868 rtx set;
9869 enum machine_mode set_mode;
9871 set = PATTERN (insn);
9872 if (GET_CODE (set) == PARALLEL)
9873 set = XVECEXP (set, 0, 0);
9874 gcc_assert (GET_CODE (set) == SET);
9875 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9877 set_mode = GET_MODE (SET_DEST (set));
9878 switch (set_mode)
9880 case CCNOmode:
9881 if (req_mode != CCNOmode
9882 && (req_mode != CCmode
9883 || XEXP (SET_SRC (set), 1) != const0_rtx))
9884 return 0;
9885 break;
9886 case CCmode:
9887 if (req_mode == CCGCmode)
9888 return 0;
9889 /* FALLTHRU */
9890 case CCGCmode:
9891 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9892 return 0;
9893 /* FALLTHRU */
9894 case CCGOCmode:
9895 if (req_mode == CCZmode)
9896 return 0;
9897 /* FALLTHRU */
9898 case CCZmode:
9899 break;
9901 default:
9902 gcc_unreachable ();
9905 return (GET_MODE (SET_SRC (set)) == set_mode);
9908 /* Generate insn patterns to do an integer compare of OPERANDS. */
9910 static rtx
9911 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9913 enum machine_mode cmpmode;
9914 rtx tmp, flags;
9916 cmpmode = SELECT_CC_MODE (code, op0, op1);
9917 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9919 /* This is very simple, but making the interface the same as in the
9920 FP case makes the rest of the code easier. */
9921 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9922 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9924 /* Return the test that should be put into the flags user, i.e.
9925 the bcc, scc, or cmov instruction. */
9926 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9929 /* Figure out whether to use ordered or unordered fp comparisons.
9930 Return the appropriate mode to use. */
9932 enum machine_mode
9933 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9935 /* ??? In order to make all comparisons reversible, we do all comparisons
9936 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9937 all forms trapping and nontrapping comparisons, we can make inequality
9938 comparisons trapping again, since it results in better code when using
9939 FCOM based compares. */
9940 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9943 enum machine_mode
9944 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9946 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9947 return ix86_fp_compare_mode (code);
9948 switch (code)
9950 /* Only zero flag is needed. */
9951 case EQ: /* ZF=0 */
9952 case NE: /* ZF!=0 */
9953 return CCZmode;
9954 /* Codes needing carry flag. */
9955 case GEU: /* CF=0 */
9956 case GTU: /* CF=0 & ZF=0 */
9957 case LTU: /* CF=1 */
9958 case LEU: /* CF=1 | ZF=1 */
9959 return CCmode;
9960 /* Codes possibly doable only with sign flag when
9961 comparing against zero. */
9962 case GE: /* SF=OF or SF=0 */
9963 case LT: /* SF<>OF or SF=1 */
9964 if (op1 == const0_rtx)
9965 return CCGOCmode;
9966 else
9967 /* For other cases Carry flag is not required. */
9968 return CCGCmode;
9969 /* Codes doable only with sign flag when comparing
9970 against zero, but we miss jump instruction for it
9971 so we need to use relational tests against overflow
9972 that thus needs to be zero. */
9973 case GT: /* ZF=0 & SF=OF */
9974 case LE: /* ZF=1 | SF<>OF */
9975 if (op1 == const0_rtx)
9976 return CCNOmode;
9977 else
9978 return CCGCmode;
9979 /* strcmp pattern do (use flags) and combine may ask us for proper
9980 mode. */
9981 case USE:
9982 return CCmode;
9983 default:
9984 gcc_unreachable ();
9988 /* Return the fixed registers used for condition codes. */
9990 static bool
9991 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9993 *p1 = FLAGS_REG;
9994 *p2 = FPSR_REG;
9995 return true;
9998 /* If two condition code modes are compatible, return a condition code
9999 mode which is compatible with both. Otherwise, return
10000 VOIDmode. */
10002 static enum machine_mode
10003 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10005 if (m1 == m2)
10006 return m1;
10008 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10009 return VOIDmode;
10011 if ((m1 == CCGCmode && m2 == CCGOCmode)
10012 || (m1 == CCGOCmode && m2 == CCGCmode))
10013 return CCGCmode;
10015 switch (m1)
10017 default:
10018 gcc_unreachable ();
10020 case CCmode:
10021 case CCGCmode:
10022 case CCGOCmode:
10023 case CCNOmode:
10024 case CCZmode:
10025 switch (m2)
10027 default:
10028 return VOIDmode;
10030 case CCmode:
10031 case CCGCmode:
10032 case CCGOCmode:
10033 case CCNOmode:
10034 case CCZmode:
10035 return CCmode;
10038 case CCFPmode:
10039 case CCFPUmode:
10040 /* These are only compatible with themselves, which we already
10041 checked above. */
10042 return VOIDmode;
10046 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10049 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10051 enum rtx_code swapped_code = swap_condition (code);
10052 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10053 || (ix86_fp_comparison_cost (swapped_code)
10054 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10057 /* Swap, force into registers, or otherwise massage the two operands
10058 to a fp comparison. The operands are updated in place; the new
10059 comparison code is returned. */
10061 static enum rtx_code
10062 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10064 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10065 rtx op0 = *pop0, op1 = *pop1;
10066 enum machine_mode op_mode = GET_MODE (op0);
10067 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10069 /* All of the unordered compare instructions only work on registers.
10070 The same is true of the fcomi compare instructions. The XFmode
10071 compare instructions require registers except when comparing
10072 against zero or when converting operand 1 from fixed point to
10073 floating point. */
10075 if (!is_sse
10076 && (fpcmp_mode == CCFPUmode
10077 || (op_mode == XFmode
10078 && ! (standard_80387_constant_p (op0) == 1
10079 || standard_80387_constant_p (op1) == 1)
10080 && GET_CODE (op1) != FLOAT)
10081 || ix86_use_fcomi_compare (code)))
10083 op0 = force_reg (op_mode, op0);
10084 op1 = force_reg (op_mode, op1);
10086 else
10088 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10089 things around if they appear profitable, otherwise force op0
10090 into a register. */
10092 if (standard_80387_constant_p (op0) == 0
10093 || (GET_CODE (op0) == MEM
10094 && ! (standard_80387_constant_p (op1) == 0
10095 || GET_CODE (op1) == MEM)))
10097 rtx tmp;
10098 tmp = op0, op0 = op1, op1 = tmp;
10099 code = swap_condition (code);
10102 if (GET_CODE (op0) != REG)
10103 op0 = force_reg (op_mode, op0);
10105 if (CONSTANT_P (op1))
10107 int tmp = standard_80387_constant_p (op1);
10108 if (tmp == 0)
10109 op1 = validize_mem (force_const_mem (op_mode, op1));
10110 else if (tmp == 1)
10112 if (TARGET_CMOVE)
10113 op1 = force_reg (op_mode, op1);
10115 else
10116 op1 = force_reg (op_mode, op1);
10120 /* Try to rearrange the comparison to make it cheaper. */
10121 if (ix86_fp_comparison_cost (code)
10122 > ix86_fp_comparison_cost (swap_condition (code))
10123 && (GET_CODE (op1) == REG || !no_new_pseudos))
10125 rtx tmp;
10126 tmp = op0, op0 = op1, op1 = tmp;
10127 code = swap_condition (code);
10128 if (GET_CODE (op0) != REG)
10129 op0 = force_reg (op_mode, op0);
10132 *pop0 = op0;
10133 *pop1 = op1;
10134 return code;
10137 /* Convert comparison codes we use to represent FP comparison to integer
10138 code that will result in proper branch. Return UNKNOWN if no such code
10139 is available. */
10141 enum rtx_code
10142 ix86_fp_compare_code_to_integer (enum rtx_code code)
10144 switch (code)
10146 case GT:
10147 return GTU;
10148 case GE:
10149 return GEU;
10150 case ORDERED:
10151 case UNORDERED:
10152 return code;
10153 break;
10154 case UNEQ:
10155 return EQ;
10156 break;
10157 case UNLT:
10158 return LTU;
10159 break;
10160 case UNLE:
10161 return LEU;
10162 break;
10163 case LTGT:
10164 return NE;
10165 break;
10166 default:
10167 return UNKNOWN;
10171 /* Split comparison code CODE into comparisons we can do using branch
10172 instructions. BYPASS_CODE is comparison code for branch that will
10173 branch around FIRST_CODE and SECOND_CODE. If some of branches
10174 is not required, set value to UNKNOWN.
10175 We never require more than two branches. */
10177 void
10178 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10179 enum rtx_code *first_code,
10180 enum rtx_code *second_code)
10182 *first_code = code;
10183 *bypass_code = UNKNOWN;
10184 *second_code = UNKNOWN;
10186 /* The fcomi comparison sets flags as follows:
10188 cmp ZF PF CF
10189 > 0 0 0
10190 < 0 0 1
10191 = 1 0 0
10192 un 1 1 1 */
10194 switch (code)
10196 case GT: /* GTU - CF=0 & ZF=0 */
10197 case GE: /* GEU - CF=0 */
10198 case ORDERED: /* PF=0 */
10199 case UNORDERED: /* PF=1 */
10200 case UNEQ: /* EQ - ZF=1 */
10201 case UNLT: /* LTU - CF=1 */
10202 case UNLE: /* LEU - CF=1 | ZF=1 */
10203 case LTGT: /* EQ - ZF=0 */
10204 break;
10205 case LT: /* LTU - CF=1 - fails on unordered */
10206 *first_code = UNLT;
10207 *bypass_code = UNORDERED;
10208 break;
10209 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10210 *first_code = UNLE;
10211 *bypass_code = UNORDERED;
10212 break;
10213 case EQ: /* EQ - ZF=1 - fails on unordered */
10214 *first_code = UNEQ;
10215 *bypass_code = UNORDERED;
10216 break;
10217 case NE: /* NE - ZF=0 - fails on unordered */
10218 *first_code = LTGT;
10219 *second_code = UNORDERED;
10220 break;
10221 case UNGE: /* GEU - CF=0 - fails on unordered */
10222 *first_code = GE;
10223 *second_code = UNORDERED;
10224 break;
10225 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10226 *first_code = GT;
10227 *second_code = UNORDERED;
10228 break;
10229 default:
10230 gcc_unreachable ();
10232 if (!TARGET_IEEE_FP)
10234 *second_code = UNKNOWN;
10235 *bypass_code = UNKNOWN;
10239 /* Return cost of comparison done fcom + arithmetics operations on AX.
10240 All following functions do use number of instructions as a cost metrics.
10241 In future this should be tweaked to compute bytes for optimize_size and
10242 take into account performance of various instructions on various CPUs. */
10243 static int
10244 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10246 if (!TARGET_IEEE_FP)
10247 return 4;
10248 /* The cost of code output by ix86_expand_fp_compare. */
10249 switch (code)
10251 case UNLE:
10252 case UNLT:
10253 case LTGT:
10254 case GT:
10255 case GE:
10256 case UNORDERED:
10257 case ORDERED:
10258 case UNEQ:
10259 return 4;
10260 break;
10261 case LT:
10262 case NE:
10263 case EQ:
10264 case UNGE:
10265 return 5;
10266 break;
10267 case LE:
10268 case UNGT:
10269 return 6;
10270 break;
10271 default:
10272 gcc_unreachable ();
10276 /* Return cost of comparison done using fcomi operation.
10277 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10278 static int
10279 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10281 enum rtx_code bypass_code, first_code, second_code;
10282 /* Return arbitrarily high cost when instruction is not supported - this
10283 prevents gcc from using it. */
10284 if (!TARGET_CMOVE)
10285 return 1024;
10286 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10287 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10290 /* Return cost of comparison done using sahf operation.
10291 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10292 static int
10293 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10295 enum rtx_code bypass_code, first_code, second_code;
10296 /* Return arbitrarily high cost when instruction is not preferred - this
10297 avoids gcc from using it. */
10298 if (!TARGET_USE_SAHF && !optimize_size)
10299 return 1024;
10300 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10301 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10304 /* Compute cost of the comparison done using any method.
10305 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10306 static int
10307 ix86_fp_comparison_cost (enum rtx_code code)
10309 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10310 int min;
10312 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10313 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10315 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10316 if (min > sahf_cost)
10317 min = sahf_cost;
10318 if (min > fcomi_cost)
10319 min = fcomi_cost;
10320 return min;
10323 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10325 static rtx
10326 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10327 rtx *second_test, rtx *bypass_test)
10329 enum machine_mode fpcmp_mode, intcmp_mode;
10330 rtx tmp, tmp2;
10331 int cost = ix86_fp_comparison_cost (code);
10332 enum rtx_code bypass_code, first_code, second_code;
10334 fpcmp_mode = ix86_fp_compare_mode (code);
10335 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10337 if (second_test)
10338 *second_test = NULL_RTX;
10339 if (bypass_test)
10340 *bypass_test = NULL_RTX;
10342 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10344 /* Do fcomi/sahf based test when profitable. */
10345 if ((bypass_code == UNKNOWN || bypass_test)
10346 && (second_code == UNKNOWN || second_test)
10347 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10349 if (TARGET_CMOVE)
10351 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10352 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10353 tmp);
10354 emit_insn (tmp);
10356 else
10358 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10359 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10360 if (!scratch)
10361 scratch = gen_reg_rtx (HImode);
10362 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10363 emit_insn (gen_x86_sahf_1 (scratch));
10366 /* The FP codes work out to act like unsigned. */
10367 intcmp_mode = fpcmp_mode;
10368 code = first_code;
10369 if (bypass_code != UNKNOWN)
10370 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10371 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10372 const0_rtx);
10373 if (second_code != UNKNOWN)
10374 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10375 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10376 const0_rtx);
10378 else
10380 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10381 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10382 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10383 if (!scratch)
10384 scratch = gen_reg_rtx (HImode);
10385 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10387 /* In the unordered case, we have to check C2 for NaN's, which
10388 doesn't happen to work out to anything nice combination-wise.
10389 So do some bit twiddling on the value we've got in AH to come
10390 up with an appropriate set of condition codes. */
10392 intcmp_mode = CCNOmode;
10393 switch (code)
10395 case GT:
10396 case UNGT:
10397 if (code == GT || !TARGET_IEEE_FP)
10399 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10400 code = EQ;
10402 else
10404 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10405 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10406 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10407 intcmp_mode = CCmode;
10408 code = GEU;
10410 break;
10411 case LT:
10412 case UNLT:
10413 if (code == LT && TARGET_IEEE_FP)
10415 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10416 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10417 intcmp_mode = CCmode;
10418 code = EQ;
10420 else
10422 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10423 code = NE;
10425 break;
10426 case GE:
10427 case UNGE:
10428 if (code == GE || !TARGET_IEEE_FP)
10430 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10431 code = EQ;
10433 else
10435 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10436 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10437 GEN_INT (0x01)));
10438 code = NE;
10440 break;
10441 case LE:
10442 case UNLE:
10443 if (code == LE && TARGET_IEEE_FP)
10445 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10446 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10447 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10448 intcmp_mode = CCmode;
10449 code = LTU;
10451 else
10453 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10454 code = NE;
10456 break;
10457 case EQ:
10458 case UNEQ:
10459 if (code == EQ && TARGET_IEEE_FP)
10461 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10462 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10463 intcmp_mode = CCmode;
10464 code = EQ;
10466 else
10468 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10469 code = NE;
10470 break;
10472 break;
10473 case NE:
10474 case LTGT:
10475 if (code == NE && TARGET_IEEE_FP)
10477 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10478 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10479 GEN_INT (0x40)));
10480 code = NE;
10482 else
10484 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10485 code = EQ;
10487 break;
10489 case UNORDERED:
10490 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10491 code = NE;
10492 break;
10493 case ORDERED:
10494 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10495 code = EQ;
10496 break;
10498 default:
10499 gcc_unreachable ();
10503 /* Return the test that should be put into the flags user, i.e.
10504 the bcc, scc, or cmov instruction. */
10505 return gen_rtx_fmt_ee (code, VOIDmode,
10506 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10507 const0_rtx);
10511 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10513 rtx op0, op1, ret;
10514 op0 = ix86_compare_op0;
10515 op1 = ix86_compare_op1;
10517 if (second_test)
10518 *second_test = NULL_RTX;
10519 if (bypass_test)
10520 *bypass_test = NULL_RTX;
10522 if (ix86_compare_emitted)
10524 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10525 ix86_compare_emitted = NULL_RTX;
10527 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10528 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10529 second_test, bypass_test);
10530 else
10531 ret = ix86_expand_int_compare (code, op0, op1);
10533 return ret;
10536 /* Return true if the CODE will result in nontrivial jump sequence. */
10537 bool
10538 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10540 enum rtx_code bypass_code, first_code, second_code;
10541 if (!TARGET_CMOVE)
10542 return true;
10543 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10544 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10547 void
10548 ix86_expand_branch (enum rtx_code code, rtx label)
10550 rtx tmp;
10552 /* If we have emitted a compare insn, go straight to simple.
10553 ix86_expand_compare won't emit anything if ix86_compare_emitted
10554 is non NULL. */
10555 if (ix86_compare_emitted)
10556 goto simple;
10558 switch (GET_MODE (ix86_compare_op0))
10560 case QImode:
10561 case HImode:
10562 case SImode:
10563 simple:
10564 tmp = ix86_expand_compare (code, NULL, NULL);
10565 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10566 gen_rtx_LABEL_REF (VOIDmode, label),
10567 pc_rtx);
10568 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10569 return;
10571 case SFmode:
10572 case DFmode:
10573 case XFmode:
10575 rtvec vec;
10576 int use_fcomi;
10577 enum rtx_code bypass_code, first_code, second_code;
10579 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10580 &ix86_compare_op1);
10582 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10584 /* Check whether we will use the natural sequence with one jump. If
10585 so, we can expand jump early. Otherwise delay expansion by
10586 creating compound insn to not confuse optimizers. */
10587 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10588 && TARGET_CMOVE)
10590 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10591 gen_rtx_LABEL_REF (VOIDmode, label),
10592 pc_rtx, NULL_RTX, NULL_RTX);
10594 else
10596 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10597 ix86_compare_op0, ix86_compare_op1);
10598 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10599 gen_rtx_LABEL_REF (VOIDmode, label),
10600 pc_rtx);
10601 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10603 use_fcomi = ix86_use_fcomi_compare (code);
10604 vec = rtvec_alloc (3 + !use_fcomi);
10605 RTVEC_ELT (vec, 0) = tmp;
10606 RTVEC_ELT (vec, 1)
10607 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10608 RTVEC_ELT (vec, 2)
10609 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10610 if (! use_fcomi)
10611 RTVEC_ELT (vec, 3)
10612 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10614 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10616 return;
10619 case DImode:
10620 if (TARGET_64BIT)
10621 goto simple;
10622 case TImode:
10623 /* Expand DImode branch into multiple compare+branch. */
10625 rtx lo[2], hi[2], label2;
10626 enum rtx_code code1, code2, code3;
10627 enum machine_mode submode;
10629 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10631 tmp = ix86_compare_op0;
10632 ix86_compare_op0 = ix86_compare_op1;
10633 ix86_compare_op1 = tmp;
10634 code = swap_condition (code);
10636 if (GET_MODE (ix86_compare_op0) == DImode)
10638 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10639 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10640 submode = SImode;
10642 else
10644 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10645 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10646 submode = DImode;
10649 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10650 avoid two branches. This costs one extra insn, so disable when
10651 optimizing for size. */
10653 if ((code == EQ || code == NE)
10654 && (!optimize_size
10655 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10657 rtx xor0, xor1;
10659 xor1 = hi[0];
10660 if (hi[1] != const0_rtx)
10661 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10662 NULL_RTX, 0, OPTAB_WIDEN);
10664 xor0 = lo[0];
10665 if (lo[1] != const0_rtx)
10666 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10667 NULL_RTX, 0, OPTAB_WIDEN);
10669 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10670 NULL_RTX, 0, OPTAB_WIDEN);
10672 ix86_compare_op0 = tmp;
10673 ix86_compare_op1 = const0_rtx;
10674 ix86_expand_branch (code, label);
10675 return;
10678 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10679 op1 is a constant and the low word is zero, then we can just
10680 examine the high word. */
10682 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10683 switch (code)
10685 case LT: case LTU: case GE: case GEU:
10686 ix86_compare_op0 = hi[0];
10687 ix86_compare_op1 = hi[1];
10688 ix86_expand_branch (code, label);
10689 return;
10690 default:
10691 break;
10694 /* Otherwise, we need two or three jumps. */
10696 label2 = gen_label_rtx ();
10698 code1 = code;
10699 code2 = swap_condition (code);
10700 code3 = unsigned_condition (code);
10702 switch (code)
10704 case LT: case GT: case LTU: case GTU:
10705 break;
10707 case LE: code1 = LT; code2 = GT; break;
10708 case GE: code1 = GT; code2 = LT; break;
10709 case LEU: code1 = LTU; code2 = GTU; break;
10710 case GEU: code1 = GTU; code2 = LTU; break;
10712 case EQ: code1 = UNKNOWN; code2 = NE; break;
10713 case NE: code2 = UNKNOWN; break;
10715 default:
10716 gcc_unreachable ();
10720 * a < b =>
10721 * if (hi(a) < hi(b)) goto true;
10722 * if (hi(a) > hi(b)) goto false;
10723 * if (lo(a) < lo(b)) goto true;
10724 * false:
10727 ix86_compare_op0 = hi[0];
10728 ix86_compare_op1 = hi[1];
10730 if (code1 != UNKNOWN)
10731 ix86_expand_branch (code1, label);
10732 if (code2 != UNKNOWN)
10733 ix86_expand_branch (code2, label2);
10735 ix86_compare_op0 = lo[0];
10736 ix86_compare_op1 = lo[1];
10737 ix86_expand_branch (code3, label);
10739 if (code2 != UNKNOWN)
10740 emit_label (label2);
10741 return;
10744 default:
10745 gcc_unreachable ();
10749 /* Split branch based on floating point condition. */
10750 void
10751 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10752 rtx target1, rtx target2, rtx tmp, rtx pushed)
10754 rtx second, bypass;
10755 rtx label = NULL_RTX;
10756 rtx condition;
10757 int bypass_probability = -1, second_probability = -1, probability = -1;
10758 rtx i;
10760 if (target2 != pc_rtx)
10762 rtx tmp = target2;
10763 code = reverse_condition_maybe_unordered (code);
10764 target2 = target1;
10765 target1 = tmp;
10768 condition = ix86_expand_fp_compare (code, op1, op2,
10769 tmp, &second, &bypass);
10771 /* Remove pushed operand from stack. */
10772 if (pushed)
10773 ix86_free_from_memory (GET_MODE (pushed));
10775 if (split_branch_probability >= 0)
10777 /* Distribute the probabilities across the jumps.
10778 Assume the BYPASS and SECOND to be always test
10779 for UNORDERED. */
10780 probability = split_branch_probability;
10782 /* Value of 1 is low enough to make no need for probability
10783 to be updated. Later we may run some experiments and see
10784 if unordered values are more frequent in practice. */
10785 if (bypass)
10786 bypass_probability = 1;
10787 if (second)
10788 second_probability = 1;
10790 if (bypass != NULL_RTX)
10792 label = gen_label_rtx ();
10793 i = emit_jump_insn (gen_rtx_SET
10794 (VOIDmode, pc_rtx,
10795 gen_rtx_IF_THEN_ELSE (VOIDmode,
10796 bypass,
10797 gen_rtx_LABEL_REF (VOIDmode,
10798 label),
10799 pc_rtx)));
10800 if (bypass_probability >= 0)
10801 REG_NOTES (i)
10802 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10803 GEN_INT (bypass_probability),
10804 REG_NOTES (i));
10806 i = emit_jump_insn (gen_rtx_SET
10807 (VOIDmode, pc_rtx,
10808 gen_rtx_IF_THEN_ELSE (VOIDmode,
10809 condition, target1, target2)));
10810 if (probability >= 0)
10811 REG_NOTES (i)
10812 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10813 GEN_INT (probability),
10814 REG_NOTES (i));
10815 if (second != NULL_RTX)
10817 i = emit_jump_insn (gen_rtx_SET
10818 (VOIDmode, pc_rtx,
10819 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10820 target2)));
10821 if (second_probability >= 0)
10822 REG_NOTES (i)
10823 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10824 GEN_INT (second_probability),
10825 REG_NOTES (i));
10827 if (label != NULL_RTX)
10828 emit_label (label);
10832 ix86_expand_setcc (enum rtx_code code, rtx dest)
10834 rtx ret, tmp, tmpreg, equiv;
10835 rtx second_test, bypass_test;
10837 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10838 return 0; /* FAIL */
10840 gcc_assert (GET_MODE (dest) == QImode);
10842 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10843 PUT_MODE (ret, QImode);
10845 tmp = dest;
10846 tmpreg = dest;
10848 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10849 if (bypass_test || second_test)
10851 rtx test = second_test;
10852 int bypass = 0;
10853 rtx tmp2 = gen_reg_rtx (QImode);
10854 if (bypass_test)
10856 gcc_assert (!second_test);
10857 test = bypass_test;
10858 bypass = 1;
10859 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10861 PUT_MODE (test, QImode);
10862 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10864 if (bypass)
10865 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10866 else
10867 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10870 /* Attach a REG_EQUAL note describing the comparison result. */
10871 if (ix86_compare_op0 && ix86_compare_op1)
10873 equiv = simplify_gen_relational (code, QImode,
10874 GET_MODE (ix86_compare_op0),
10875 ix86_compare_op0, ix86_compare_op1);
10876 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10879 return 1; /* DONE */
10882 /* Expand comparison setting or clearing carry flag. Return true when
10883 successful and set pop for the operation. */
10884 static bool
10885 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10887 enum machine_mode mode =
10888 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10890 /* Do not handle DImode compares that go through special path. Also we can't
10891 deal with FP compares yet. This is possible to add. */
10892 if (mode == (TARGET_64BIT ? TImode : DImode))
10893 return false;
10894 if (FLOAT_MODE_P (mode))
10896 rtx second_test = NULL, bypass_test = NULL;
10897 rtx compare_op, compare_seq;
10899 /* Shortcut: following common codes never translate into carry flag compares. */
10900 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10901 || code == ORDERED || code == UNORDERED)
10902 return false;
10904 /* These comparisons require zero flag; swap operands so they won't. */
10905 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10906 && !TARGET_IEEE_FP)
10908 rtx tmp = op0;
10909 op0 = op1;
10910 op1 = tmp;
10911 code = swap_condition (code);
10914 /* Try to expand the comparison and verify that we end up with carry flag
10915 based comparison. This is fails to be true only when we decide to expand
10916 comparison using arithmetic that is not too common scenario. */
10917 start_sequence ();
10918 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10919 &second_test, &bypass_test);
10920 compare_seq = get_insns ();
10921 end_sequence ();
10923 if (second_test || bypass_test)
10924 return false;
10925 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10926 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10927 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10928 else
10929 code = GET_CODE (compare_op);
10930 if (code != LTU && code != GEU)
10931 return false;
10932 emit_insn (compare_seq);
10933 *pop = compare_op;
10934 return true;
10936 if (!INTEGRAL_MODE_P (mode))
10937 return false;
10938 switch (code)
10940 case LTU:
10941 case GEU:
10942 break;
10944 /* Convert a==0 into (unsigned)a<1. */
10945 case EQ:
10946 case NE:
10947 if (op1 != const0_rtx)
10948 return false;
10949 op1 = const1_rtx;
10950 code = (code == EQ ? LTU : GEU);
10951 break;
10953 /* Convert a>b into b<a or a>=b-1. */
10954 case GTU:
10955 case LEU:
10956 if (GET_CODE (op1) == CONST_INT)
10958 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10959 /* Bail out on overflow. We still can swap operands but that
10960 would force loading of the constant into register. */
10961 if (op1 == const0_rtx
10962 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10963 return false;
10964 code = (code == GTU ? GEU : LTU);
10966 else
10968 rtx tmp = op1;
10969 op1 = op0;
10970 op0 = tmp;
10971 code = (code == GTU ? LTU : GEU);
10973 break;
10975 /* Convert a>=0 into (unsigned)a<0x80000000. */
10976 case LT:
10977 case GE:
10978 if (mode == DImode || op1 != const0_rtx)
10979 return false;
10980 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10981 code = (code == LT ? GEU : LTU);
10982 break;
10983 case LE:
10984 case GT:
10985 if (mode == DImode || op1 != constm1_rtx)
10986 return false;
10987 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10988 code = (code == LE ? GEU : LTU);
10989 break;
10991 default:
10992 return false;
10994 /* Swapping operands may cause constant to appear as first operand. */
10995 if (!nonimmediate_operand (op0, VOIDmode))
10997 if (no_new_pseudos)
10998 return false;
10999 op0 = force_reg (mode, op0);
11001 ix86_compare_op0 = op0;
11002 ix86_compare_op1 = op1;
11003 *pop = ix86_expand_compare (code, NULL, NULL);
11004 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11005 return true;
11009 ix86_expand_int_movcc (rtx operands[])
11011 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11012 rtx compare_seq, compare_op;
11013 rtx second_test, bypass_test;
11014 enum machine_mode mode = GET_MODE (operands[0]);
11015 bool sign_bit_compare_p = false;;
11017 start_sequence ();
11018 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11019 compare_seq = get_insns ();
11020 end_sequence ();
11022 compare_code = GET_CODE (compare_op);
11024 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11025 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11026 sign_bit_compare_p = true;
11028 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11029 HImode insns, we'd be swallowed in word prefix ops. */
11031 if ((mode != HImode || TARGET_FAST_PREFIX)
11032 && (mode != (TARGET_64BIT ? TImode : DImode))
11033 && GET_CODE (operands[2]) == CONST_INT
11034 && GET_CODE (operands[3]) == CONST_INT)
11036 rtx out = operands[0];
11037 HOST_WIDE_INT ct = INTVAL (operands[2]);
11038 HOST_WIDE_INT cf = INTVAL (operands[3]);
11039 HOST_WIDE_INT diff;
11041 diff = ct - cf;
11042 /* Sign bit compares are better done using shifts than we do by using
11043 sbb. */
11044 if (sign_bit_compare_p
11045 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11046 ix86_compare_op1, &compare_op))
11048 /* Detect overlap between destination and compare sources. */
11049 rtx tmp = out;
11051 if (!sign_bit_compare_p)
11053 bool fpcmp = false;
11055 compare_code = GET_CODE (compare_op);
11057 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11058 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11060 fpcmp = true;
11061 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11064 /* To simplify rest of code, restrict to the GEU case. */
11065 if (compare_code == LTU)
11067 HOST_WIDE_INT tmp = ct;
11068 ct = cf;
11069 cf = tmp;
11070 compare_code = reverse_condition (compare_code);
11071 code = reverse_condition (code);
11073 else
11075 if (fpcmp)
11076 PUT_CODE (compare_op,
11077 reverse_condition_maybe_unordered
11078 (GET_CODE (compare_op)));
11079 else
11080 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11082 diff = ct - cf;
11084 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11085 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11086 tmp = gen_reg_rtx (mode);
11088 if (mode == DImode)
11089 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11090 else
11091 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11093 else
11095 if (code == GT || code == GE)
11096 code = reverse_condition (code);
11097 else
11099 HOST_WIDE_INT tmp = ct;
11100 ct = cf;
11101 cf = tmp;
11102 diff = ct - cf;
11104 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11105 ix86_compare_op1, VOIDmode, 0, -1);
11108 if (diff == 1)
11111 * cmpl op0,op1
11112 * sbbl dest,dest
11113 * [addl dest, ct]
11115 * Size 5 - 8.
11117 if (ct)
11118 tmp = expand_simple_binop (mode, PLUS,
11119 tmp, GEN_INT (ct),
11120 copy_rtx (tmp), 1, OPTAB_DIRECT);
11122 else if (cf == -1)
11125 * cmpl op0,op1
11126 * sbbl dest,dest
11127 * orl $ct, dest
11129 * Size 8.
11131 tmp = expand_simple_binop (mode, IOR,
11132 tmp, GEN_INT (ct),
11133 copy_rtx (tmp), 1, OPTAB_DIRECT);
11135 else if (diff == -1 && ct)
11138 * cmpl op0,op1
11139 * sbbl dest,dest
11140 * notl dest
11141 * [addl dest, cf]
11143 * Size 8 - 11.
11145 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11146 if (cf)
11147 tmp = expand_simple_binop (mode, PLUS,
11148 copy_rtx (tmp), GEN_INT (cf),
11149 copy_rtx (tmp), 1, OPTAB_DIRECT);
11151 else
11154 * cmpl op0,op1
11155 * sbbl dest,dest
11156 * [notl dest]
11157 * andl cf - ct, dest
11158 * [addl dest, ct]
11160 * Size 8 - 11.
11163 if (cf == 0)
11165 cf = ct;
11166 ct = 0;
11167 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11170 tmp = expand_simple_binop (mode, AND,
11171 copy_rtx (tmp),
11172 gen_int_mode (cf - ct, mode),
11173 copy_rtx (tmp), 1, OPTAB_DIRECT);
11174 if (ct)
11175 tmp = expand_simple_binop (mode, PLUS,
11176 copy_rtx (tmp), GEN_INT (ct),
11177 copy_rtx (tmp), 1, OPTAB_DIRECT);
11180 if (!rtx_equal_p (tmp, out))
11181 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11183 return 1; /* DONE */
11186 if (diff < 0)
11188 HOST_WIDE_INT tmp;
11189 tmp = ct, ct = cf, cf = tmp;
11190 diff = -diff;
11191 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11193 /* We may be reversing unordered compare to normal compare, that
11194 is not valid in general (we may convert non-trapping condition
11195 to trapping one), however on i386 we currently emit all
11196 comparisons unordered. */
11197 compare_code = reverse_condition_maybe_unordered (compare_code);
11198 code = reverse_condition_maybe_unordered (code);
11200 else
11202 compare_code = reverse_condition (compare_code);
11203 code = reverse_condition (code);
11207 compare_code = UNKNOWN;
11208 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11209 && GET_CODE (ix86_compare_op1) == CONST_INT)
11211 if (ix86_compare_op1 == const0_rtx
11212 && (code == LT || code == GE))
11213 compare_code = code;
11214 else if (ix86_compare_op1 == constm1_rtx)
11216 if (code == LE)
11217 compare_code = LT;
11218 else if (code == GT)
11219 compare_code = GE;
11223 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11224 if (compare_code != UNKNOWN
11225 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11226 && (cf == -1 || ct == -1))
11228 /* If lea code below could be used, only optimize
11229 if it results in a 2 insn sequence. */
11231 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11232 || diff == 3 || diff == 5 || diff == 9)
11233 || (compare_code == LT && ct == -1)
11234 || (compare_code == GE && cf == -1))
11237 * notl op1 (if necessary)
11238 * sarl $31, op1
11239 * orl cf, op1
11241 if (ct != -1)
11243 cf = ct;
11244 ct = -1;
11245 code = reverse_condition (code);
11248 out = emit_store_flag (out, code, ix86_compare_op0,
11249 ix86_compare_op1, VOIDmode, 0, -1);
11251 out = expand_simple_binop (mode, IOR,
11252 out, GEN_INT (cf),
11253 out, 1, OPTAB_DIRECT);
11254 if (out != operands[0])
11255 emit_move_insn (operands[0], out);
11257 return 1; /* DONE */
11262 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11263 || diff == 3 || diff == 5 || diff == 9)
11264 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11265 && (mode != DImode
11266 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11269 * xorl dest,dest
11270 * cmpl op1,op2
11271 * setcc dest
11272 * lea cf(dest*(ct-cf)),dest
11274 * Size 14.
11276 * This also catches the degenerate setcc-only case.
11279 rtx tmp;
11280 int nops;
11282 out = emit_store_flag (out, code, ix86_compare_op0,
11283 ix86_compare_op1, VOIDmode, 0, 1);
11285 nops = 0;
11286 /* On x86_64 the lea instruction operates on Pmode, so we need
11287 to get arithmetics done in proper mode to match. */
11288 if (diff == 1)
11289 tmp = copy_rtx (out);
11290 else
11292 rtx out1;
11293 out1 = copy_rtx (out);
11294 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11295 nops++;
11296 if (diff & 1)
11298 tmp = gen_rtx_PLUS (mode, tmp, out1);
11299 nops++;
11302 if (cf != 0)
11304 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11305 nops++;
11307 if (!rtx_equal_p (tmp, out))
11309 if (nops == 1)
11310 out = force_operand (tmp, copy_rtx (out));
11311 else
11312 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11314 if (!rtx_equal_p (out, operands[0]))
11315 emit_move_insn (operands[0], copy_rtx (out));
11317 return 1; /* DONE */
11321 * General case: Jumpful:
11322 * xorl dest,dest cmpl op1, op2
11323 * cmpl op1, op2 movl ct, dest
11324 * setcc dest jcc 1f
11325 * decl dest movl cf, dest
11326 * andl (cf-ct),dest 1:
11327 * addl ct,dest
11329 * Size 20. Size 14.
11331 * This is reasonably steep, but branch mispredict costs are
11332 * high on modern cpus, so consider failing only if optimizing
11333 * for space.
11336 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11337 && BRANCH_COST >= 2)
11339 if (cf == 0)
11341 cf = ct;
11342 ct = 0;
11343 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11344 /* We may be reversing unordered compare to normal compare,
11345 that is not valid in general (we may convert non-trapping
11346 condition to trapping one), however on i386 we currently
11347 emit all comparisons unordered. */
11348 code = reverse_condition_maybe_unordered (code);
11349 else
11351 code = reverse_condition (code);
11352 if (compare_code != UNKNOWN)
11353 compare_code = reverse_condition (compare_code);
11357 if (compare_code != UNKNOWN)
11359 /* notl op1 (if needed)
11360 sarl $31, op1
11361 andl (cf-ct), op1
11362 addl ct, op1
11364 For x < 0 (resp. x <= -1) there will be no notl,
11365 so if possible swap the constants to get rid of the
11366 complement.
11367 True/false will be -1/0 while code below (store flag
11368 followed by decrement) is 0/-1, so the constants need
11369 to be exchanged once more. */
11371 if (compare_code == GE || !cf)
11373 code = reverse_condition (code);
11374 compare_code = LT;
11376 else
11378 HOST_WIDE_INT tmp = cf;
11379 cf = ct;
11380 ct = tmp;
11383 out = emit_store_flag (out, code, ix86_compare_op0,
11384 ix86_compare_op1, VOIDmode, 0, -1);
11386 else
11388 out = emit_store_flag (out, code, ix86_compare_op0,
11389 ix86_compare_op1, VOIDmode, 0, 1);
11391 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11392 copy_rtx (out), 1, OPTAB_DIRECT);
11395 out = expand_simple_binop (mode, AND, copy_rtx (out),
11396 gen_int_mode (cf - ct, mode),
11397 copy_rtx (out), 1, OPTAB_DIRECT);
11398 if (ct)
11399 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11400 copy_rtx (out), 1, OPTAB_DIRECT);
11401 if (!rtx_equal_p (out, operands[0]))
11402 emit_move_insn (operands[0], copy_rtx (out));
11404 return 1; /* DONE */
11408 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11410 /* Try a few things more with specific constants and a variable. */
11412 optab op;
11413 rtx var, orig_out, out, tmp;
11415 if (BRANCH_COST <= 2)
11416 return 0; /* FAIL */
11418 /* If one of the two operands is an interesting constant, load a
11419 constant with the above and mask it in with a logical operation. */
11421 if (GET_CODE (operands[2]) == CONST_INT)
11423 var = operands[3];
11424 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11425 operands[3] = constm1_rtx, op = and_optab;
11426 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11427 operands[3] = const0_rtx, op = ior_optab;
11428 else
11429 return 0; /* FAIL */
11431 else if (GET_CODE (operands[3]) == CONST_INT)
11433 var = operands[2];
11434 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11435 operands[2] = constm1_rtx, op = and_optab;
11436 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11437 operands[2] = const0_rtx, op = ior_optab;
11438 else
11439 return 0; /* FAIL */
11441 else
11442 return 0; /* FAIL */
11444 orig_out = operands[0];
11445 tmp = gen_reg_rtx (mode);
11446 operands[0] = tmp;
11448 /* Recurse to get the constant loaded. */
11449 if (ix86_expand_int_movcc (operands) == 0)
11450 return 0; /* FAIL */
11452 /* Mask in the interesting variable. */
11453 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11454 OPTAB_WIDEN);
11455 if (!rtx_equal_p (out, orig_out))
11456 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11458 return 1; /* DONE */
11462 * For comparison with above,
11464 * movl cf,dest
11465 * movl ct,tmp
11466 * cmpl op1,op2
11467 * cmovcc tmp,dest
11469 * Size 15.
11472 if (! nonimmediate_operand (operands[2], mode))
11473 operands[2] = force_reg (mode, operands[2]);
11474 if (! nonimmediate_operand (operands[3], mode))
11475 operands[3] = force_reg (mode, operands[3]);
11477 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11479 rtx tmp = gen_reg_rtx (mode);
11480 emit_move_insn (tmp, operands[3]);
11481 operands[3] = tmp;
11483 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11485 rtx tmp = gen_reg_rtx (mode);
11486 emit_move_insn (tmp, operands[2]);
11487 operands[2] = tmp;
11490 if (! register_operand (operands[2], VOIDmode)
11491 && (mode == QImode
11492 || ! register_operand (operands[3], VOIDmode)))
11493 operands[2] = force_reg (mode, operands[2]);
11495 if (mode == QImode
11496 && ! register_operand (operands[3], VOIDmode))
11497 operands[3] = force_reg (mode, operands[3]);
11499 emit_insn (compare_seq);
11500 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11501 gen_rtx_IF_THEN_ELSE (mode,
11502 compare_op, operands[2],
11503 operands[3])));
11504 if (bypass_test)
11505 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11506 gen_rtx_IF_THEN_ELSE (mode,
11507 bypass_test,
11508 copy_rtx (operands[3]),
11509 copy_rtx (operands[0]))));
11510 if (second_test)
11511 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11512 gen_rtx_IF_THEN_ELSE (mode,
11513 second_test,
11514 copy_rtx (operands[2]),
11515 copy_rtx (operands[0]))));
11517 return 1; /* DONE */
11520 /* Swap, force into registers, or otherwise massage the two operands
11521 to an sse comparison with a mask result. Thus we differ a bit from
11522 ix86_prepare_fp_compare_args which expects to produce a flags result.
11524 The DEST operand exists to help determine whether to commute commutative
11525 operators. The POP0/POP1 operands are updated in place. The new
11526 comparison code is returned, or UNKNOWN if not implementable. */
11528 static enum rtx_code
11529 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11530 rtx *pop0, rtx *pop1)
11532 rtx tmp;
11534 switch (code)
11536 case LTGT:
11537 case UNEQ:
11538 /* We have no LTGT as an operator. We could implement it with
11539 NE & ORDERED, but this requires an extra temporary. It's
11540 not clear that it's worth it. */
11541 return UNKNOWN;
11543 case LT:
11544 case LE:
11545 case UNGT:
11546 case UNGE:
11547 /* These are supported directly. */
11548 break;
11550 case EQ:
11551 case NE:
11552 case UNORDERED:
11553 case ORDERED:
11554 /* For commutative operators, try to canonicalize the destination
11555 operand to be first in the comparison - this helps reload to
11556 avoid extra moves. */
11557 if (!dest || !rtx_equal_p (dest, *pop1))
11558 break;
11559 /* FALLTHRU */
11561 case GE:
11562 case GT:
11563 case UNLE:
11564 case UNLT:
11565 /* These are not supported directly. Swap the comparison operands
11566 to transform into something that is supported. */
11567 tmp = *pop0;
11568 *pop0 = *pop1;
11569 *pop1 = tmp;
11570 code = swap_condition (code);
11571 break;
11573 default:
11574 gcc_unreachable ();
11577 return code;
11580 /* Detect conditional moves that exactly match min/max operational
11581 semantics. Note that this is IEEE safe, as long as we don't
11582 interchange the operands.
11584 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11585 and TRUE if the operation is successful and instructions are emitted. */
11587 static bool
11588 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11589 rtx cmp_op1, rtx if_true, rtx if_false)
11591 enum machine_mode mode;
11592 bool is_min;
11593 rtx tmp;
11595 if (code == LT)
11597 else if (code == UNGE)
11599 tmp = if_true;
11600 if_true = if_false;
11601 if_false = tmp;
11603 else
11604 return false;
11606 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11607 is_min = true;
11608 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11609 is_min = false;
11610 else
11611 return false;
11613 mode = GET_MODE (dest);
11615 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11616 but MODE may be a vector mode and thus not appropriate. */
11617 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11619 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11620 rtvec v;
11622 if_true = force_reg (mode, if_true);
11623 v = gen_rtvec (2, if_true, if_false);
11624 tmp = gen_rtx_UNSPEC (mode, v, u);
11626 else
11628 code = is_min ? SMIN : SMAX;
11629 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11632 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11633 return true;
11636 /* Expand an sse vector comparison. Return the register with the result. */
11638 static rtx
11639 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11640 rtx op_true, rtx op_false)
11642 enum machine_mode mode = GET_MODE (dest);
11643 rtx x;
11645 cmp_op0 = force_reg (mode, cmp_op0);
11646 if (!nonimmediate_operand (cmp_op1, mode))
11647 cmp_op1 = force_reg (mode, cmp_op1);
11649 if (optimize
11650 || reg_overlap_mentioned_p (dest, op_true)
11651 || reg_overlap_mentioned_p (dest, op_false))
11652 dest = gen_reg_rtx (mode);
11654 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11655 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11657 return dest;
11660 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11661 operations. This is used for both scalar and vector conditional moves. */
11663 static void
11664 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11666 enum machine_mode mode = GET_MODE (dest);
11667 rtx t2, t3, x;
11669 if (op_false == CONST0_RTX (mode))
11671 op_true = force_reg (mode, op_true);
11672 x = gen_rtx_AND (mode, cmp, op_true);
11673 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11675 else if (op_true == CONST0_RTX (mode))
11677 op_false = force_reg (mode, op_false);
11678 x = gen_rtx_NOT (mode, cmp);
11679 x = gen_rtx_AND (mode, x, op_false);
11680 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11682 else
11684 op_true = force_reg (mode, op_true);
11685 op_false = force_reg (mode, op_false);
11687 t2 = gen_reg_rtx (mode);
11688 if (optimize)
11689 t3 = gen_reg_rtx (mode);
11690 else
11691 t3 = dest;
11693 x = gen_rtx_AND (mode, op_true, cmp);
11694 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11696 x = gen_rtx_NOT (mode, cmp);
11697 x = gen_rtx_AND (mode, x, op_false);
11698 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11700 x = gen_rtx_IOR (mode, t3, t2);
11701 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11705 /* Expand a floating-point conditional move. Return true if successful. */
11708 ix86_expand_fp_movcc (rtx operands[])
11710 enum machine_mode mode = GET_MODE (operands[0]);
11711 enum rtx_code code = GET_CODE (operands[1]);
11712 rtx tmp, compare_op, second_test, bypass_test;
11714 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11716 enum machine_mode cmode;
11718 /* Since we've no cmove for sse registers, don't force bad register
11719 allocation just to gain access to it. Deny movcc when the
11720 comparison mode doesn't match the move mode. */
11721 cmode = GET_MODE (ix86_compare_op0);
11722 if (cmode == VOIDmode)
11723 cmode = GET_MODE (ix86_compare_op1);
11724 if (cmode != mode)
11725 return 0;
11727 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11728 &ix86_compare_op0,
11729 &ix86_compare_op1);
11730 if (code == UNKNOWN)
11731 return 0;
11733 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11734 ix86_compare_op1, operands[2],
11735 operands[3]))
11736 return 1;
11738 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11739 ix86_compare_op1, operands[2], operands[3]);
11740 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11741 return 1;
11744 /* The floating point conditional move instructions don't directly
11745 support conditions resulting from a signed integer comparison. */
11747 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11749 /* The floating point conditional move instructions don't directly
11750 support signed integer comparisons. */
11752 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11754 gcc_assert (!second_test && !bypass_test);
11755 tmp = gen_reg_rtx (QImode);
11756 ix86_expand_setcc (code, tmp);
11757 code = NE;
11758 ix86_compare_op0 = tmp;
11759 ix86_compare_op1 = const0_rtx;
11760 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11762 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11764 tmp = gen_reg_rtx (mode);
11765 emit_move_insn (tmp, operands[3]);
11766 operands[3] = tmp;
11768 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11770 tmp = gen_reg_rtx (mode);
11771 emit_move_insn (tmp, operands[2]);
11772 operands[2] = tmp;
11775 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11776 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11777 operands[2], operands[3])));
11778 if (bypass_test)
11779 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11780 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11781 operands[3], operands[0])));
11782 if (second_test)
11783 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11784 gen_rtx_IF_THEN_ELSE (mode, second_test,
11785 operands[2], operands[0])));
11787 return 1;
11790 /* Expand a floating-point vector conditional move; a vcond operation
11791 rather than a movcc operation. */
11793 bool
11794 ix86_expand_fp_vcond (rtx operands[])
11796 enum rtx_code code = GET_CODE (operands[3]);
11797 rtx cmp;
11799 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11800 &operands[4], &operands[5]);
11801 if (code == UNKNOWN)
11802 return false;
11804 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11805 operands[5], operands[1], operands[2]))
11806 return true;
11808 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11809 operands[1], operands[2]);
11810 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11811 return true;
11814 /* Expand a signed integral vector conditional move. */
11816 bool
11817 ix86_expand_int_vcond (rtx operands[])
11819 enum machine_mode mode = GET_MODE (operands[0]);
11820 enum rtx_code code = GET_CODE (operands[3]);
11821 bool negate = false;
11822 rtx x, cop0, cop1;
11824 cop0 = operands[4];
11825 cop1 = operands[5];
11827 /* Canonicalize the comparison to EQ, GT, GTU. */
11828 switch (code)
11830 case EQ:
11831 case GT:
11832 case GTU:
11833 break;
11835 case NE:
11836 case LE:
11837 case LEU:
11838 code = reverse_condition (code);
11839 negate = true;
11840 break;
11842 case GE:
11843 case GEU:
11844 code = reverse_condition (code);
11845 negate = true;
11846 /* FALLTHRU */
11848 case LT:
11849 case LTU:
11850 code = swap_condition (code);
11851 x = cop0, cop0 = cop1, cop1 = x;
11852 break;
11854 default:
11855 gcc_unreachable ();
11858 /* Unsigned parallel compare is not supported by the hardware. Play some
11859 tricks to turn this into a signed comparison against 0. */
11860 if (code == GTU)
11862 cop0 = force_reg (mode, cop0);
11864 switch (mode)
11866 case V4SImode:
11868 rtx t1, t2, mask;
11870 /* Perform a parallel modulo subtraction. */
11871 t1 = gen_reg_rtx (mode);
11872 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11874 /* Extract the original sign bit of op0. */
11875 mask = GEN_INT (-0x80000000);
11876 mask = gen_rtx_CONST_VECTOR (mode,
11877 gen_rtvec (4, mask, mask, mask, mask));
11878 mask = force_reg (mode, mask);
11879 t2 = gen_reg_rtx (mode);
11880 emit_insn (gen_andv4si3 (t2, cop0, mask));
11882 /* XOR it back into the result of the subtraction. This results
11883 in the sign bit set iff we saw unsigned underflow. */
11884 x = gen_reg_rtx (mode);
11885 emit_insn (gen_xorv4si3 (x, t1, t2));
11887 code = GT;
11889 break;
11891 case V16QImode:
11892 case V8HImode:
11893 /* Perform a parallel unsigned saturating subtraction. */
11894 x = gen_reg_rtx (mode);
11895 emit_insn (gen_rtx_SET (VOIDmode, x,
11896 gen_rtx_US_MINUS (mode, cop0, cop1)));
11898 code = EQ;
11899 negate = !negate;
11900 break;
11902 default:
11903 gcc_unreachable ();
11906 cop0 = x;
11907 cop1 = CONST0_RTX (mode);
11910 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11911 operands[1+negate], operands[2-negate]);
11913 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11914 operands[2-negate]);
11915 return true;
11918 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
11919 true if we should do zero extension, else sign extension. HIGH_P is
11920 true if we want the N/2 high elements, else the low elements. */
11922 void
11923 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
11925 enum machine_mode imode = GET_MODE (operands[1]);
11926 rtx (*unpack)(rtx, rtx, rtx);
11927 rtx se, dest;
11929 switch (imode)
11931 case V16QImode:
11932 if (high_p)
11933 unpack = gen_vec_interleave_highv16qi;
11934 else
11935 unpack = gen_vec_interleave_lowv16qi;
11936 break;
11937 case V8HImode:
11938 if (high_p)
11939 unpack = gen_vec_interleave_highv8hi;
11940 else
11941 unpack = gen_vec_interleave_lowv8hi;
11942 break;
11943 case V4SImode:
11944 if (high_p)
11945 unpack = gen_vec_interleave_highv4si;
11946 else
11947 unpack = gen_vec_interleave_lowv4si;
11948 break;
11949 default:
11950 gcc_unreachable ();
11953 dest = gen_lowpart (imode, operands[0]);
11955 if (unsigned_p)
11956 se = force_reg (imode, CONST0_RTX (imode));
11957 else
11958 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
11959 operands[1], pc_rtx, pc_rtx);
11961 emit_insn (unpack (dest, operands[1], se));
11964 /* Expand conditional increment or decrement using adb/sbb instructions.
11965 The default case using setcc followed by the conditional move can be
11966 done by generic code. */
11968 ix86_expand_int_addcc (rtx operands[])
11970 enum rtx_code code = GET_CODE (operands[1]);
11971 rtx compare_op;
11972 rtx val = const0_rtx;
11973 bool fpcmp = false;
11974 enum machine_mode mode = GET_MODE (operands[0]);
11976 if (operands[3] != const1_rtx
11977 && operands[3] != constm1_rtx)
11978 return 0;
11979 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11980 ix86_compare_op1, &compare_op))
11981 return 0;
11982 code = GET_CODE (compare_op);
11984 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11985 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11987 fpcmp = true;
11988 code = ix86_fp_compare_code_to_integer (code);
11991 if (code != LTU)
11993 val = constm1_rtx;
11994 if (fpcmp)
11995 PUT_CODE (compare_op,
11996 reverse_condition_maybe_unordered
11997 (GET_CODE (compare_op)));
11998 else
11999 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12001 PUT_MODE (compare_op, mode);
12003 /* Construct either adc or sbb insn. */
12004 if ((code == LTU) == (operands[3] == constm1_rtx))
12006 switch (GET_MODE (operands[0]))
12008 case QImode:
12009 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12010 break;
12011 case HImode:
12012 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12013 break;
12014 case SImode:
12015 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12016 break;
12017 case DImode:
12018 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12019 break;
12020 default:
12021 gcc_unreachable ();
12024 else
12026 switch (GET_MODE (operands[0]))
12028 case QImode:
12029 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12030 break;
12031 case HImode:
12032 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12033 break;
12034 case SImode:
12035 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12036 break;
12037 case DImode:
12038 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12039 break;
12040 default:
12041 gcc_unreachable ();
12044 return 1; /* DONE */
12048 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12049 works for floating pointer parameters and nonoffsetable memories.
12050 For pushes, it returns just stack offsets; the values will be saved
12051 in the right order. Maximally three parts are generated. */
12053 static int
12054 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12056 int size;
12058 if (!TARGET_64BIT)
12059 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12060 else
12061 size = (GET_MODE_SIZE (mode) + 4) / 8;
12063 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12064 gcc_assert (size >= 2 && size <= 3);
12066 /* Optimize constant pool reference to immediates. This is used by fp
12067 moves, that force all constants to memory to allow combining. */
12068 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12070 rtx tmp = maybe_get_pool_constant (operand);
12071 if (tmp)
12072 operand = tmp;
12075 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12077 /* The only non-offsetable memories we handle are pushes. */
12078 int ok = push_operand (operand, VOIDmode);
12080 gcc_assert (ok);
12082 operand = copy_rtx (operand);
12083 PUT_MODE (operand, Pmode);
12084 parts[0] = parts[1] = parts[2] = operand;
12085 return size;
12088 if (GET_CODE (operand) == CONST_VECTOR)
12090 enum machine_mode imode = int_mode_for_mode (mode);
12091 /* Caution: if we looked through a constant pool memory above,
12092 the operand may actually have a different mode now. That's
12093 ok, since we want to pun this all the way back to an integer. */
12094 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12095 gcc_assert (operand != NULL);
12096 mode = imode;
12099 if (!TARGET_64BIT)
12101 if (mode == DImode)
12102 split_di (&operand, 1, &parts[0], &parts[1]);
12103 else
12105 if (REG_P (operand))
12107 gcc_assert (reload_completed);
12108 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12109 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12110 if (size == 3)
12111 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12113 else if (offsettable_memref_p (operand))
12115 operand = adjust_address (operand, SImode, 0);
12116 parts[0] = operand;
12117 parts[1] = adjust_address (operand, SImode, 4);
12118 if (size == 3)
12119 parts[2] = adjust_address (operand, SImode, 8);
12121 else if (GET_CODE (operand) == CONST_DOUBLE)
12123 REAL_VALUE_TYPE r;
12124 long l[4];
12126 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12127 switch (mode)
12129 case XFmode:
12130 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12131 parts[2] = gen_int_mode (l[2], SImode);
12132 break;
12133 case DFmode:
12134 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12135 break;
12136 default:
12137 gcc_unreachable ();
12139 parts[1] = gen_int_mode (l[1], SImode);
12140 parts[0] = gen_int_mode (l[0], SImode);
12142 else
12143 gcc_unreachable ();
12146 else
12148 if (mode == TImode)
12149 split_ti (&operand, 1, &parts[0], &parts[1]);
12150 if (mode == XFmode || mode == TFmode)
12152 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12153 if (REG_P (operand))
12155 gcc_assert (reload_completed);
12156 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12157 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12159 else if (offsettable_memref_p (operand))
12161 operand = adjust_address (operand, DImode, 0);
12162 parts[0] = operand;
12163 parts[1] = adjust_address (operand, upper_mode, 8);
12165 else if (GET_CODE (operand) == CONST_DOUBLE)
12167 REAL_VALUE_TYPE r;
12168 long l[4];
12170 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12171 real_to_target (l, &r, mode);
12173 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12174 if (HOST_BITS_PER_WIDE_INT >= 64)
12175 parts[0]
12176 = gen_int_mode
12177 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12178 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12179 DImode);
12180 else
12181 parts[0] = immed_double_const (l[0], l[1], DImode);
12183 if (upper_mode == SImode)
12184 parts[1] = gen_int_mode (l[2], SImode);
12185 else if (HOST_BITS_PER_WIDE_INT >= 64)
12186 parts[1]
12187 = gen_int_mode
12188 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12189 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12190 DImode);
12191 else
12192 parts[1] = immed_double_const (l[2], l[3], DImode);
12194 else
12195 gcc_unreachable ();
12199 return size;
12202 /* Emit insns to perform a move or push of DI, DF, and XF values.
12203 Return false when normal moves are needed; true when all required
12204 insns have been emitted. Operands 2-4 contain the input values
12205 int the correct order; operands 5-7 contain the output values. */
12207 void
12208 ix86_split_long_move (rtx operands[])
12210 rtx part[2][3];
12211 int nparts;
12212 int push = 0;
12213 int collisions = 0;
12214 enum machine_mode mode = GET_MODE (operands[0]);
12216 /* The DFmode expanders may ask us to move double.
12217 For 64bit target this is single move. By hiding the fact
12218 here we simplify i386.md splitters. */
12219 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12221 /* Optimize constant pool reference to immediates. This is used by
12222 fp moves, that force all constants to memory to allow combining. */
12224 if (GET_CODE (operands[1]) == MEM
12225 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12226 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12227 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12228 if (push_operand (operands[0], VOIDmode))
12230 operands[0] = copy_rtx (operands[0]);
12231 PUT_MODE (operands[0], Pmode);
12233 else
12234 operands[0] = gen_lowpart (DImode, operands[0]);
12235 operands[1] = gen_lowpart (DImode, operands[1]);
12236 emit_move_insn (operands[0], operands[1]);
12237 return;
12240 /* The only non-offsettable memory we handle is push. */
12241 if (push_operand (operands[0], VOIDmode))
12242 push = 1;
12243 else
12244 gcc_assert (GET_CODE (operands[0]) != MEM
12245 || offsettable_memref_p (operands[0]));
12247 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12248 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12250 /* When emitting push, take care for source operands on the stack. */
12251 if (push && GET_CODE (operands[1]) == MEM
12252 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12254 if (nparts == 3)
12255 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12256 XEXP (part[1][2], 0));
12257 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12258 XEXP (part[1][1], 0));
12261 /* We need to do copy in the right order in case an address register
12262 of the source overlaps the destination. */
12263 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12265 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12266 collisions++;
12267 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12268 collisions++;
12269 if (nparts == 3
12270 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12271 collisions++;
12273 /* Collision in the middle part can be handled by reordering. */
12274 if (collisions == 1 && nparts == 3
12275 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12277 rtx tmp;
12278 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12279 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12282 /* If there are more collisions, we can't handle it by reordering.
12283 Do an lea to the last part and use only one colliding move. */
12284 else if (collisions > 1)
12286 rtx base;
12288 collisions = 1;
12290 base = part[0][nparts - 1];
12292 /* Handle the case when the last part isn't valid for lea.
12293 Happens in 64-bit mode storing the 12-byte XFmode. */
12294 if (GET_MODE (base) != Pmode)
12295 base = gen_rtx_REG (Pmode, REGNO (base));
12297 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12298 part[1][0] = replace_equiv_address (part[1][0], base);
12299 part[1][1] = replace_equiv_address (part[1][1],
12300 plus_constant (base, UNITS_PER_WORD));
12301 if (nparts == 3)
12302 part[1][2] = replace_equiv_address (part[1][2],
12303 plus_constant (base, 8));
12307 if (push)
12309 if (!TARGET_64BIT)
12311 if (nparts == 3)
12313 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12314 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12315 emit_move_insn (part[0][2], part[1][2]);
12318 else
12320 /* In 64bit mode we don't have 32bit push available. In case this is
12321 register, it is OK - we will just use larger counterpart. We also
12322 retype memory - these comes from attempt to avoid REX prefix on
12323 moving of second half of TFmode value. */
12324 if (GET_MODE (part[1][1]) == SImode)
12326 switch (GET_CODE (part[1][1]))
12328 case MEM:
12329 part[1][1] = adjust_address (part[1][1], DImode, 0);
12330 break;
12332 case REG:
12333 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12334 break;
12336 default:
12337 gcc_unreachable ();
12340 if (GET_MODE (part[1][0]) == SImode)
12341 part[1][0] = part[1][1];
12344 emit_move_insn (part[0][1], part[1][1]);
12345 emit_move_insn (part[0][0], part[1][0]);
12346 return;
12349 /* Choose correct order to not overwrite the source before it is copied. */
12350 if ((REG_P (part[0][0])
12351 && REG_P (part[1][1])
12352 && (REGNO (part[0][0]) == REGNO (part[1][1])
12353 || (nparts == 3
12354 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12355 || (collisions > 0
12356 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12358 if (nparts == 3)
12360 operands[2] = part[0][2];
12361 operands[3] = part[0][1];
12362 operands[4] = part[0][0];
12363 operands[5] = part[1][2];
12364 operands[6] = part[1][1];
12365 operands[7] = part[1][0];
12367 else
12369 operands[2] = part[0][1];
12370 operands[3] = part[0][0];
12371 operands[5] = part[1][1];
12372 operands[6] = part[1][0];
12375 else
12377 if (nparts == 3)
12379 operands[2] = part[0][0];
12380 operands[3] = part[0][1];
12381 operands[4] = part[0][2];
12382 operands[5] = part[1][0];
12383 operands[6] = part[1][1];
12384 operands[7] = part[1][2];
12386 else
12388 operands[2] = part[0][0];
12389 operands[3] = part[0][1];
12390 operands[5] = part[1][0];
12391 operands[6] = part[1][1];
12395 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12396 if (optimize_size)
12398 if (GET_CODE (operands[5]) == CONST_INT
12399 && operands[5] != const0_rtx
12400 && REG_P (operands[2]))
12402 if (GET_CODE (operands[6]) == CONST_INT
12403 && INTVAL (operands[6]) == INTVAL (operands[5]))
12404 operands[6] = operands[2];
12406 if (nparts == 3
12407 && GET_CODE (operands[7]) == CONST_INT
12408 && INTVAL (operands[7]) == INTVAL (operands[5]))
12409 operands[7] = operands[2];
12412 if (nparts == 3
12413 && GET_CODE (operands[6]) == CONST_INT
12414 && operands[6] != const0_rtx
12415 && REG_P (operands[3])
12416 && GET_CODE (operands[7]) == CONST_INT
12417 && INTVAL (operands[7]) == INTVAL (operands[6]))
12418 operands[7] = operands[3];
12421 emit_move_insn (operands[2], operands[5]);
12422 emit_move_insn (operands[3], operands[6]);
12423 if (nparts == 3)
12424 emit_move_insn (operands[4], operands[7]);
12426 return;
12429 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12430 left shift by a constant, either using a single shift or
12431 a sequence of add instructions. */
12433 static void
12434 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12436 if (count == 1)
12438 emit_insn ((mode == DImode
12439 ? gen_addsi3
12440 : gen_adddi3) (operand, operand, operand));
12442 else if (!optimize_size
12443 && count * ix86_cost->add <= ix86_cost->shift_const)
12445 int i;
12446 for (i=0; i<count; i++)
12448 emit_insn ((mode == DImode
12449 ? gen_addsi3
12450 : gen_adddi3) (operand, operand, operand));
12453 else
12454 emit_insn ((mode == DImode
12455 ? gen_ashlsi3
12456 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12459 void
12460 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12462 rtx low[2], high[2];
12463 int count;
12464 const int single_width = mode == DImode ? 32 : 64;
12466 if (GET_CODE (operands[2]) == CONST_INT)
12468 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12469 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12471 if (count >= single_width)
12473 emit_move_insn (high[0], low[1]);
12474 emit_move_insn (low[0], const0_rtx);
12476 if (count > single_width)
12477 ix86_expand_ashl_const (high[0], count - single_width, mode);
12479 else
12481 if (!rtx_equal_p (operands[0], operands[1]))
12482 emit_move_insn (operands[0], operands[1]);
12483 emit_insn ((mode == DImode
12484 ? gen_x86_shld_1
12485 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12486 ix86_expand_ashl_const (low[0], count, mode);
12488 return;
12491 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12493 if (operands[1] == const1_rtx)
12495 /* Assuming we've chosen a QImode capable registers, then 1 << N
12496 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12497 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12499 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12501 ix86_expand_clear (low[0]);
12502 ix86_expand_clear (high[0]);
12503 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12505 d = gen_lowpart (QImode, low[0]);
12506 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12507 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12508 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12510 d = gen_lowpart (QImode, high[0]);
12511 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12512 s = gen_rtx_NE (QImode, flags, const0_rtx);
12513 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12516 /* Otherwise, we can get the same results by manually performing
12517 a bit extract operation on bit 5/6, and then performing the two
12518 shifts. The two methods of getting 0/1 into low/high are exactly
12519 the same size. Avoiding the shift in the bit extract case helps
12520 pentium4 a bit; no one else seems to care much either way. */
12521 else
12523 rtx x;
12525 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12526 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12527 else
12528 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12529 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12531 emit_insn ((mode == DImode
12532 ? gen_lshrsi3
12533 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12534 emit_insn ((mode == DImode
12535 ? gen_andsi3
12536 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12537 emit_move_insn (low[0], high[0]);
12538 emit_insn ((mode == DImode
12539 ? gen_xorsi3
12540 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12543 emit_insn ((mode == DImode
12544 ? gen_ashlsi3
12545 : gen_ashldi3) (low[0], low[0], operands[2]));
12546 emit_insn ((mode == DImode
12547 ? gen_ashlsi3
12548 : gen_ashldi3) (high[0], high[0], operands[2]));
12549 return;
12552 if (operands[1] == constm1_rtx)
12554 /* For -1 << N, we can avoid the shld instruction, because we
12555 know that we're shifting 0...31/63 ones into a -1. */
12556 emit_move_insn (low[0], constm1_rtx);
12557 if (optimize_size)
12558 emit_move_insn (high[0], low[0]);
12559 else
12560 emit_move_insn (high[0], constm1_rtx);
12562 else
12564 if (!rtx_equal_p (operands[0], operands[1]))
12565 emit_move_insn (operands[0], operands[1]);
12567 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12568 emit_insn ((mode == DImode
12569 ? gen_x86_shld_1
12570 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12573 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12575 if (TARGET_CMOVE && scratch)
12577 ix86_expand_clear (scratch);
12578 emit_insn ((mode == DImode
12579 ? gen_x86_shift_adj_1
12580 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12582 else
12583 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12586 void
12587 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12589 rtx low[2], high[2];
12590 int count;
12591 const int single_width = mode == DImode ? 32 : 64;
12593 if (GET_CODE (operands[2]) == CONST_INT)
12595 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12596 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12598 if (count == single_width * 2 - 1)
12600 emit_move_insn (high[0], high[1]);
12601 emit_insn ((mode == DImode
12602 ? gen_ashrsi3
12603 : gen_ashrdi3) (high[0], high[0],
12604 GEN_INT (single_width - 1)));
12605 emit_move_insn (low[0], high[0]);
12608 else if (count >= single_width)
12610 emit_move_insn (low[0], high[1]);
12611 emit_move_insn (high[0], low[0]);
12612 emit_insn ((mode == DImode
12613 ? gen_ashrsi3
12614 : gen_ashrdi3) (high[0], high[0],
12615 GEN_INT (single_width - 1)));
12616 if (count > single_width)
12617 emit_insn ((mode == DImode
12618 ? gen_ashrsi3
12619 : gen_ashrdi3) (low[0], low[0],
12620 GEN_INT (count - single_width)));
12622 else
12624 if (!rtx_equal_p (operands[0], operands[1]))
12625 emit_move_insn (operands[0], operands[1]);
12626 emit_insn ((mode == DImode
12627 ? gen_x86_shrd_1
12628 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12629 emit_insn ((mode == DImode
12630 ? gen_ashrsi3
12631 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12634 else
12636 if (!rtx_equal_p (operands[0], operands[1]))
12637 emit_move_insn (operands[0], operands[1]);
12639 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12641 emit_insn ((mode == DImode
12642 ? gen_x86_shrd_1
12643 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12644 emit_insn ((mode == DImode
12645 ? gen_ashrsi3
12646 : gen_ashrdi3) (high[0], high[0], operands[2]));
12648 if (TARGET_CMOVE && scratch)
12650 emit_move_insn (scratch, high[0]);
12651 emit_insn ((mode == DImode
12652 ? gen_ashrsi3
12653 : gen_ashrdi3) (scratch, scratch,
12654 GEN_INT (single_width - 1)));
12655 emit_insn ((mode == DImode
12656 ? gen_x86_shift_adj_1
12657 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12658 scratch));
12660 else
12661 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12665 void
12666 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12668 rtx low[2], high[2];
12669 int count;
12670 const int single_width = mode == DImode ? 32 : 64;
12672 if (GET_CODE (operands[2]) == CONST_INT)
12674 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12675 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12677 if (count >= single_width)
12679 emit_move_insn (low[0], high[1]);
12680 ix86_expand_clear (high[0]);
12682 if (count > single_width)
12683 emit_insn ((mode == DImode
12684 ? gen_lshrsi3
12685 : gen_lshrdi3) (low[0], low[0],
12686 GEN_INT (count - single_width)));
12688 else
12690 if (!rtx_equal_p (operands[0], operands[1]))
12691 emit_move_insn (operands[0], operands[1]);
12692 emit_insn ((mode == DImode
12693 ? gen_x86_shrd_1
12694 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12695 emit_insn ((mode == DImode
12696 ? gen_lshrsi3
12697 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12700 else
12702 if (!rtx_equal_p (operands[0], operands[1]))
12703 emit_move_insn (operands[0], operands[1]);
12705 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12707 emit_insn ((mode == DImode
12708 ? gen_x86_shrd_1
12709 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12710 emit_insn ((mode == DImode
12711 ? gen_lshrsi3
12712 : gen_lshrdi3) (high[0], high[0], operands[2]));
12714 /* Heh. By reversing the arguments, we can reuse this pattern. */
12715 if (TARGET_CMOVE && scratch)
12717 ix86_expand_clear (scratch);
12718 emit_insn ((mode == DImode
12719 ? gen_x86_shift_adj_1
12720 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12721 scratch));
12723 else
12724 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12728 /* Helper function for the string operations below. Dest VARIABLE whether
12729 it is aligned to VALUE bytes. If true, jump to the label. */
12730 static rtx
12731 ix86_expand_aligntest (rtx variable, int value)
12733 rtx label = gen_label_rtx ();
12734 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12735 if (GET_MODE (variable) == DImode)
12736 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12737 else
12738 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12739 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12740 1, label);
12741 return label;
12744 /* Adjust COUNTER by the VALUE. */
12745 static void
12746 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12748 if (GET_MODE (countreg) == DImode)
12749 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12750 else
12751 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12754 /* Zero extend possibly SImode EXP to Pmode register. */
12756 ix86_zero_extend_to_Pmode (rtx exp)
12758 rtx r;
12759 if (GET_MODE (exp) == VOIDmode)
12760 return force_reg (Pmode, exp);
12761 if (GET_MODE (exp) == Pmode)
12762 return copy_to_mode_reg (Pmode, exp);
12763 r = gen_reg_rtx (Pmode);
12764 emit_insn (gen_zero_extendsidi2 (r, exp));
12765 return r;
12768 /* Expand string move (memcpy) operation. Use i386 string operations when
12769 profitable. expand_clrmem contains similar code. */
12771 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12773 rtx srcreg, destreg, countreg, srcexp, destexp;
12774 enum machine_mode counter_mode;
12775 HOST_WIDE_INT align = 0;
12776 unsigned HOST_WIDE_INT count = 0;
12778 if (GET_CODE (align_exp) == CONST_INT)
12779 align = INTVAL (align_exp);
12781 /* Can't use any of this if the user has appropriated esi or edi. */
12782 if (global_regs[4] || global_regs[5])
12783 return 0;
12785 /* This simple hack avoids all inlining code and simplifies code below. */
12786 if (!TARGET_ALIGN_STRINGOPS)
12787 align = 64;
12789 if (GET_CODE (count_exp) == CONST_INT)
12791 count = INTVAL (count_exp);
12792 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12793 return 0;
12796 /* Figure out proper mode for counter. For 32bits it is always SImode,
12797 for 64bits use SImode when possible, otherwise DImode.
12798 Set count to number of bytes copied when known at compile time. */
12799 if (!TARGET_64BIT
12800 || GET_MODE (count_exp) == SImode
12801 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12802 counter_mode = SImode;
12803 else
12804 counter_mode = DImode;
12806 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12808 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12809 if (destreg != XEXP (dst, 0))
12810 dst = replace_equiv_address_nv (dst, destreg);
12811 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12812 if (srcreg != XEXP (src, 0))
12813 src = replace_equiv_address_nv (src, srcreg);
12815 /* When optimizing for size emit simple rep ; movsb instruction for
12816 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12817 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12818 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12819 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12820 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12821 known to be zero or not. The rep; movsb sequence causes higher
12822 register pressure though, so take that into account. */
12824 if ((!optimize || optimize_size)
12825 && (count == 0
12826 || ((count & 0x03)
12827 && (!optimize_size
12828 || count > 5 * 4
12829 || (count & 3) + count / 4 > 6))))
12831 emit_insn (gen_cld ());
12832 countreg = ix86_zero_extend_to_Pmode (count_exp);
12833 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12834 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12835 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12836 destexp, srcexp));
12839 /* For constant aligned (or small unaligned) copies use rep movsl
12840 followed by code copying the rest. For PentiumPro ensure 8 byte
12841 alignment to allow rep movsl acceleration. */
12843 else if (count != 0
12844 && (align >= 8
12845 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12846 || optimize_size || count < (unsigned int) 64))
12848 unsigned HOST_WIDE_INT offset = 0;
12849 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12850 rtx srcmem, dstmem;
12852 emit_insn (gen_cld ());
12853 if (count & ~(size - 1))
12855 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12857 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12859 while (offset < (count & ~(size - 1)))
12861 srcmem = adjust_automodify_address_nv (src, movs_mode,
12862 srcreg, offset);
12863 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12864 destreg, offset);
12865 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12866 offset += size;
12869 else
12871 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12872 & (TARGET_64BIT ? -1 : 0x3fffffff));
12873 countreg = copy_to_mode_reg (counter_mode, countreg);
12874 countreg = ix86_zero_extend_to_Pmode (countreg);
12876 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12877 GEN_INT (size == 4 ? 2 : 3));
12878 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12879 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12881 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12882 countreg, destexp, srcexp));
12883 offset = count & ~(size - 1);
12886 if (size == 8 && (count & 0x04))
12888 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12889 offset);
12890 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12891 offset);
12892 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12893 offset += 4;
12895 if (count & 0x02)
12897 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12898 offset);
12899 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12900 offset);
12901 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12902 offset += 2;
12904 if (count & 0x01)
12906 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12907 offset);
12908 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12909 offset);
12910 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12913 /* The generic code based on the glibc implementation:
12914 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12915 allowing accelerated copying there)
12916 - copy the data using rep movsl
12917 - copy the rest. */
12918 else
12920 rtx countreg2;
12921 rtx label = NULL;
12922 rtx srcmem, dstmem;
12923 int desired_alignment = (TARGET_PENTIUMPRO
12924 && (count == 0 || count >= (unsigned int) 260)
12925 ? 8 : UNITS_PER_WORD);
12926 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12927 dst = change_address (dst, BLKmode, destreg);
12928 src = change_address (src, BLKmode, srcreg);
12930 /* In case we don't know anything about the alignment, default to
12931 library version, since it is usually equally fast and result in
12932 shorter code.
12934 Also emit call when we know that the count is large and call overhead
12935 will not be important. */
12936 if (!TARGET_INLINE_ALL_STRINGOPS
12937 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12938 return 0;
12940 if (TARGET_SINGLE_STRINGOP)
12941 emit_insn (gen_cld ());
12943 countreg2 = gen_reg_rtx (Pmode);
12944 countreg = copy_to_mode_reg (counter_mode, count_exp);
12946 /* We don't use loops to align destination and to copy parts smaller
12947 than 4 bytes, because gcc is able to optimize such code better (in
12948 the case the destination or the count really is aligned, gcc is often
12949 able to predict the branches) and also it is friendlier to the
12950 hardware branch prediction.
12952 Using loops is beneficial for generic case, because we can
12953 handle small counts using the loops. Many CPUs (such as Athlon)
12954 have large REP prefix setup costs.
12956 This is quite costly. Maybe we can revisit this decision later or
12957 add some customizability to this code. */
12959 if (count == 0 && align < desired_alignment)
12961 label = gen_label_rtx ();
12962 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12963 LEU, 0, counter_mode, 1, label);
12965 if (align <= 1)
12967 rtx label = ix86_expand_aligntest (destreg, 1);
12968 srcmem = change_address (src, QImode, srcreg);
12969 dstmem = change_address (dst, QImode, destreg);
12970 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12971 ix86_adjust_counter (countreg, 1);
12972 emit_label (label);
12973 LABEL_NUSES (label) = 1;
12975 if (align <= 2)
12977 rtx label = ix86_expand_aligntest (destreg, 2);
12978 srcmem = change_address (src, HImode, srcreg);
12979 dstmem = change_address (dst, HImode, destreg);
12980 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12981 ix86_adjust_counter (countreg, 2);
12982 emit_label (label);
12983 LABEL_NUSES (label) = 1;
12985 if (align <= 4 && desired_alignment > 4)
12987 rtx label = ix86_expand_aligntest (destreg, 4);
12988 srcmem = change_address (src, SImode, srcreg);
12989 dstmem = change_address (dst, SImode, destreg);
12990 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12991 ix86_adjust_counter (countreg, 4);
12992 emit_label (label);
12993 LABEL_NUSES (label) = 1;
12996 if (label && desired_alignment > 4 && !TARGET_64BIT)
12998 emit_label (label);
12999 LABEL_NUSES (label) = 1;
13000 label = NULL_RTX;
13002 if (!TARGET_SINGLE_STRINGOP)
13003 emit_insn (gen_cld ());
13004 if (TARGET_64BIT)
13006 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13007 GEN_INT (3)));
13008 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13010 else
13012 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13013 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13015 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13016 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13017 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13018 countreg2, destexp, srcexp));
13020 if (label)
13022 emit_label (label);
13023 LABEL_NUSES (label) = 1;
13025 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13027 srcmem = change_address (src, SImode, srcreg);
13028 dstmem = change_address (dst, SImode, destreg);
13029 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13031 if ((align <= 4 || count == 0) && TARGET_64BIT)
13033 rtx label = ix86_expand_aligntest (countreg, 4);
13034 srcmem = change_address (src, SImode, srcreg);
13035 dstmem = change_address (dst, SImode, destreg);
13036 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13037 emit_label (label);
13038 LABEL_NUSES (label) = 1;
13040 if (align > 2 && count != 0 && (count & 2))
13042 srcmem = change_address (src, HImode, srcreg);
13043 dstmem = change_address (dst, HImode, destreg);
13044 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13046 if (align <= 2 || count == 0)
13048 rtx label = ix86_expand_aligntest (countreg, 2);
13049 srcmem = change_address (src, HImode, srcreg);
13050 dstmem = change_address (dst, HImode, destreg);
13051 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13052 emit_label (label);
13053 LABEL_NUSES (label) = 1;
13055 if (align > 1 && count != 0 && (count & 1))
13057 srcmem = change_address (src, QImode, srcreg);
13058 dstmem = change_address (dst, QImode, destreg);
13059 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13061 if (align <= 1 || count == 0)
13063 rtx label = ix86_expand_aligntest (countreg, 1);
13064 srcmem = change_address (src, QImode, srcreg);
13065 dstmem = change_address (dst, QImode, destreg);
13066 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13067 emit_label (label);
13068 LABEL_NUSES (label) = 1;
13072 return 1;
13075 /* Expand string clear operation (bzero). Use i386 string operations when
13076 profitable. expand_movmem contains similar code. */
13078 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13080 rtx destreg, zeroreg, countreg, destexp;
13081 enum machine_mode counter_mode;
13082 HOST_WIDE_INT align = 0;
13083 unsigned HOST_WIDE_INT count = 0;
13085 if (GET_CODE (align_exp) == CONST_INT)
13086 align = INTVAL (align_exp);
13088 /* Can't use any of this if the user has appropriated esi. */
13089 if (global_regs[4])
13090 return 0;
13092 /* This simple hack avoids all inlining code and simplifies code below. */
13093 if (!TARGET_ALIGN_STRINGOPS)
13094 align = 32;
13096 if (GET_CODE (count_exp) == CONST_INT)
13098 count = INTVAL (count_exp);
13099 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13100 return 0;
13102 /* Figure out proper mode for counter. For 32bits it is always SImode,
13103 for 64bits use SImode when possible, otherwise DImode.
13104 Set count to number of bytes copied when known at compile time. */
13105 if (!TARGET_64BIT
13106 || GET_MODE (count_exp) == SImode
13107 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13108 counter_mode = SImode;
13109 else
13110 counter_mode = DImode;
13112 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13113 if (destreg != XEXP (dst, 0))
13114 dst = replace_equiv_address_nv (dst, destreg);
13117 /* When optimizing for size emit simple rep ; movsb instruction for
13118 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13119 sequence is 7 bytes long, so if optimizing for size and count is
13120 small enough that some stosl, stosw and stosb instructions without
13121 rep are shorter, fall back into the next if. */
13123 if ((!optimize || optimize_size)
13124 && (count == 0
13125 || ((count & 0x03)
13126 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13128 emit_insn (gen_cld ());
13130 countreg = ix86_zero_extend_to_Pmode (count_exp);
13131 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13132 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13133 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13135 else if (count != 0
13136 && (align >= 8
13137 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13138 || optimize_size || count < (unsigned int) 64))
13140 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13141 unsigned HOST_WIDE_INT offset = 0;
13143 emit_insn (gen_cld ());
13145 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13146 if (count & ~(size - 1))
13148 unsigned HOST_WIDE_INT repcount;
13149 unsigned int max_nonrep;
13151 repcount = count >> (size == 4 ? 2 : 3);
13152 if (!TARGET_64BIT)
13153 repcount &= 0x3fffffff;
13155 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13156 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13157 bytes. In both cases the latter seems to be faster for small
13158 values of N. */
13159 max_nonrep = size == 4 ? 7 : 4;
13160 if (!optimize_size)
13161 switch (ix86_tune)
13163 case PROCESSOR_PENTIUM4:
13164 case PROCESSOR_NOCONA:
13165 max_nonrep = 3;
13166 break;
13167 default:
13168 break;
13171 if (repcount <= max_nonrep)
13172 while (repcount-- > 0)
13174 rtx mem = adjust_automodify_address_nv (dst,
13175 GET_MODE (zeroreg),
13176 destreg, offset);
13177 emit_insn (gen_strset (destreg, mem, zeroreg));
13178 offset += size;
13180 else
13182 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13183 countreg = ix86_zero_extend_to_Pmode (countreg);
13184 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13185 GEN_INT (size == 4 ? 2 : 3));
13186 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13187 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13188 destexp));
13189 offset = count & ~(size - 1);
13192 if (size == 8 && (count & 0x04))
13194 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13195 offset);
13196 emit_insn (gen_strset (destreg, mem,
13197 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13198 offset += 4;
13200 if (count & 0x02)
13202 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13203 offset);
13204 emit_insn (gen_strset (destreg, mem,
13205 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13206 offset += 2;
13208 if (count & 0x01)
13210 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13211 offset);
13212 emit_insn (gen_strset (destreg, mem,
13213 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13216 else
13218 rtx countreg2;
13219 rtx label = NULL;
13220 /* Compute desired alignment of the string operation. */
13221 int desired_alignment = (TARGET_PENTIUMPRO
13222 && (count == 0 || count >= (unsigned int) 260)
13223 ? 8 : UNITS_PER_WORD);
13225 /* In case we don't know anything about the alignment, default to
13226 library version, since it is usually equally fast and result in
13227 shorter code.
13229 Also emit call when we know that the count is large and call overhead
13230 will not be important. */
13231 if (!TARGET_INLINE_ALL_STRINGOPS
13232 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13233 return 0;
13235 if (TARGET_SINGLE_STRINGOP)
13236 emit_insn (gen_cld ());
13238 countreg2 = gen_reg_rtx (Pmode);
13239 countreg = copy_to_mode_reg (counter_mode, count_exp);
13240 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13241 /* Get rid of MEM_OFFSET, it won't be accurate. */
13242 dst = change_address (dst, BLKmode, destreg);
13244 if (count == 0 && align < desired_alignment)
13246 label = gen_label_rtx ();
13247 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13248 LEU, 0, counter_mode, 1, label);
13250 if (align <= 1)
13252 rtx label = ix86_expand_aligntest (destreg, 1);
13253 emit_insn (gen_strset (destreg, dst,
13254 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13255 ix86_adjust_counter (countreg, 1);
13256 emit_label (label);
13257 LABEL_NUSES (label) = 1;
13259 if (align <= 2)
13261 rtx label = ix86_expand_aligntest (destreg, 2);
13262 emit_insn (gen_strset (destreg, dst,
13263 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13264 ix86_adjust_counter (countreg, 2);
13265 emit_label (label);
13266 LABEL_NUSES (label) = 1;
13268 if (align <= 4 && desired_alignment > 4)
13270 rtx label = ix86_expand_aligntest (destreg, 4);
13271 emit_insn (gen_strset (destreg, dst,
13272 (TARGET_64BIT
13273 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13274 : zeroreg)));
13275 ix86_adjust_counter (countreg, 4);
13276 emit_label (label);
13277 LABEL_NUSES (label) = 1;
13280 if (label && desired_alignment > 4 && !TARGET_64BIT)
13282 emit_label (label);
13283 LABEL_NUSES (label) = 1;
13284 label = NULL_RTX;
13287 if (!TARGET_SINGLE_STRINGOP)
13288 emit_insn (gen_cld ());
13289 if (TARGET_64BIT)
13291 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13292 GEN_INT (3)));
13293 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13295 else
13297 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13298 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13300 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13301 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13303 if (label)
13305 emit_label (label);
13306 LABEL_NUSES (label) = 1;
13309 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13310 emit_insn (gen_strset (destreg, dst,
13311 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13312 if (TARGET_64BIT && (align <= 4 || count == 0))
13314 rtx label = ix86_expand_aligntest (countreg, 4);
13315 emit_insn (gen_strset (destreg, dst,
13316 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13317 emit_label (label);
13318 LABEL_NUSES (label) = 1;
13320 if (align > 2 && count != 0 && (count & 2))
13321 emit_insn (gen_strset (destreg, dst,
13322 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13323 if (align <= 2 || count == 0)
13325 rtx label = ix86_expand_aligntest (countreg, 2);
13326 emit_insn (gen_strset (destreg, dst,
13327 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13328 emit_label (label);
13329 LABEL_NUSES (label) = 1;
13331 if (align > 1 && count != 0 && (count & 1))
13332 emit_insn (gen_strset (destreg, dst,
13333 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13334 if (align <= 1 || count == 0)
13336 rtx label = ix86_expand_aligntest (countreg, 1);
13337 emit_insn (gen_strset (destreg, dst,
13338 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13339 emit_label (label);
13340 LABEL_NUSES (label) = 1;
13343 return 1;
13346 /* Expand strlen. */
13348 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13350 rtx addr, scratch1, scratch2, scratch3, scratch4;
13352 /* The generic case of strlen expander is long. Avoid it's
13353 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13355 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13356 && !TARGET_INLINE_ALL_STRINGOPS
13357 && !optimize_size
13358 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13359 return 0;
13361 addr = force_reg (Pmode, XEXP (src, 0));
13362 scratch1 = gen_reg_rtx (Pmode);
13364 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13365 && !optimize_size)
13367 /* Well it seems that some optimizer does not combine a call like
13368 foo(strlen(bar), strlen(bar));
13369 when the move and the subtraction is done here. It does calculate
13370 the length just once when these instructions are done inside of
13371 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13372 often used and I use one fewer register for the lifetime of
13373 output_strlen_unroll() this is better. */
13375 emit_move_insn (out, addr);
13377 ix86_expand_strlensi_unroll_1 (out, src, align);
13379 /* strlensi_unroll_1 returns the address of the zero at the end of
13380 the string, like memchr(), so compute the length by subtracting
13381 the start address. */
13382 if (TARGET_64BIT)
13383 emit_insn (gen_subdi3 (out, out, addr));
13384 else
13385 emit_insn (gen_subsi3 (out, out, addr));
13387 else
13389 rtx unspec;
13390 scratch2 = gen_reg_rtx (Pmode);
13391 scratch3 = gen_reg_rtx (Pmode);
13392 scratch4 = force_reg (Pmode, constm1_rtx);
13394 emit_move_insn (scratch3, addr);
13395 eoschar = force_reg (QImode, eoschar);
13397 emit_insn (gen_cld ());
13398 src = replace_equiv_address_nv (src, scratch3);
13400 /* If .md starts supporting :P, this can be done in .md. */
13401 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13402 scratch4), UNSPEC_SCAS);
13403 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13404 if (TARGET_64BIT)
13406 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13407 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13409 else
13411 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13412 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13415 return 1;
13418 /* Expand the appropriate insns for doing strlen if not just doing
13419 repnz; scasb
13421 out = result, initialized with the start address
13422 align_rtx = alignment of the address.
13423 scratch = scratch register, initialized with the startaddress when
13424 not aligned, otherwise undefined
13426 This is just the body. It needs the initializations mentioned above and
13427 some address computing at the end. These things are done in i386.md. */
13429 static void
13430 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13432 int align;
13433 rtx tmp;
13434 rtx align_2_label = NULL_RTX;
13435 rtx align_3_label = NULL_RTX;
13436 rtx align_4_label = gen_label_rtx ();
13437 rtx end_0_label = gen_label_rtx ();
13438 rtx mem;
13439 rtx tmpreg = gen_reg_rtx (SImode);
13440 rtx scratch = gen_reg_rtx (SImode);
13441 rtx cmp;
13443 align = 0;
13444 if (GET_CODE (align_rtx) == CONST_INT)
13445 align = INTVAL (align_rtx);
13447 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13449 /* Is there a known alignment and is it less than 4? */
13450 if (align < 4)
13452 rtx scratch1 = gen_reg_rtx (Pmode);
13453 emit_move_insn (scratch1, out);
13454 /* Is there a known alignment and is it not 2? */
13455 if (align != 2)
13457 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13458 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13460 /* Leave just the 3 lower bits. */
13461 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13462 NULL_RTX, 0, OPTAB_WIDEN);
13464 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13465 Pmode, 1, align_4_label);
13466 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13467 Pmode, 1, align_2_label);
13468 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13469 Pmode, 1, align_3_label);
13471 else
13473 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13474 check if is aligned to 4 - byte. */
13476 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13477 NULL_RTX, 0, OPTAB_WIDEN);
13479 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13480 Pmode, 1, align_4_label);
13483 mem = change_address (src, QImode, out);
13485 /* Now compare the bytes. */
13487 /* Compare the first n unaligned byte on a byte per byte basis. */
13488 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13489 QImode, 1, end_0_label);
13491 /* Increment the address. */
13492 if (TARGET_64BIT)
13493 emit_insn (gen_adddi3 (out, out, const1_rtx));
13494 else
13495 emit_insn (gen_addsi3 (out, out, const1_rtx));
13497 /* Not needed with an alignment of 2 */
13498 if (align != 2)
13500 emit_label (align_2_label);
13502 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13503 end_0_label);
13505 if (TARGET_64BIT)
13506 emit_insn (gen_adddi3 (out, out, const1_rtx));
13507 else
13508 emit_insn (gen_addsi3 (out, out, const1_rtx));
13510 emit_label (align_3_label);
13513 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13514 end_0_label);
13516 if (TARGET_64BIT)
13517 emit_insn (gen_adddi3 (out, out, const1_rtx));
13518 else
13519 emit_insn (gen_addsi3 (out, out, const1_rtx));
13522 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13523 align this loop. It gives only huge programs, but does not help to
13524 speed up. */
13525 emit_label (align_4_label);
13527 mem = change_address (src, SImode, out);
13528 emit_move_insn (scratch, mem);
13529 if (TARGET_64BIT)
13530 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13531 else
13532 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13534 /* This formula yields a nonzero result iff one of the bytes is zero.
13535 This saves three branches inside loop and many cycles. */
13537 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13538 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13539 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13540 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13541 gen_int_mode (0x80808080, SImode)));
13542 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13543 align_4_label);
13545 if (TARGET_CMOVE)
13547 rtx reg = gen_reg_rtx (SImode);
13548 rtx reg2 = gen_reg_rtx (Pmode);
13549 emit_move_insn (reg, tmpreg);
13550 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13552 /* If zero is not in the first two bytes, move two bytes forward. */
13553 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13554 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13555 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13556 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13557 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13558 reg,
13559 tmpreg)));
13560 /* Emit lea manually to avoid clobbering of flags. */
13561 emit_insn (gen_rtx_SET (SImode, reg2,
13562 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13564 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13565 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13566 emit_insn (gen_rtx_SET (VOIDmode, out,
13567 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13568 reg2,
13569 out)));
13572 else
13574 rtx end_2_label = gen_label_rtx ();
13575 /* Is zero in the first two bytes? */
13577 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13578 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13579 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13580 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13581 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13582 pc_rtx);
13583 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13584 JUMP_LABEL (tmp) = end_2_label;
13586 /* Not in the first two. Move two bytes forward. */
13587 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13588 if (TARGET_64BIT)
13589 emit_insn (gen_adddi3 (out, out, const2_rtx));
13590 else
13591 emit_insn (gen_addsi3 (out, out, const2_rtx));
13593 emit_label (end_2_label);
13597 /* Avoid branch in fixing the byte. */
13598 tmpreg = gen_lowpart (QImode, tmpreg);
13599 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13600 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13601 if (TARGET_64BIT)
13602 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13603 else
13604 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13606 emit_label (end_0_label);
13609 void
13610 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13611 rtx callarg2 ATTRIBUTE_UNUSED,
13612 rtx pop, int sibcall)
13614 rtx use = NULL, call;
13616 if (pop == const0_rtx)
13617 pop = NULL;
13618 gcc_assert (!TARGET_64BIT || !pop);
13620 if (TARGET_MACHO && !TARGET_64BIT)
13622 #if TARGET_MACHO
13623 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13624 fnaddr = machopic_indirect_call_target (fnaddr);
13625 #endif
13627 else
13629 /* Static functions and indirect calls don't need the pic register. */
13630 if (! TARGET_64BIT && flag_pic
13631 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13632 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13633 use_reg (&use, pic_offset_table_rtx);
13636 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13638 rtx al = gen_rtx_REG (QImode, 0);
13639 emit_move_insn (al, callarg2);
13640 use_reg (&use, al);
13643 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13645 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13646 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13648 if (sibcall && TARGET_64BIT
13649 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13651 rtx addr;
13652 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13653 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13654 emit_move_insn (fnaddr, addr);
13655 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13658 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13659 if (retval)
13660 call = gen_rtx_SET (VOIDmode, retval, call);
13661 if (pop)
13663 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13664 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13665 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13668 call = emit_call_insn (call);
13669 if (use)
13670 CALL_INSN_FUNCTION_USAGE (call) = use;
13674 /* Clear stack slot assignments remembered from previous functions.
13675 This is called from INIT_EXPANDERS once before RTL is emitted for each
13676 function. */
13678 static struct machine_function *
13679 ix86_init_machine_status (void)
13681 struct machine_function *f;
13683 f = ggc_alloc_cleared (sizeof (struct machine_function));
13684 f->use_fast_prologue_epilogue_nregs = -1;
13685 f->tls_descriptor_call_expanded_p = 0;
13687 return f;
13690 /* Return a MEM corresponding to a stack slot with mode MODE.
13691 Allocate a new slot if necessary.
13693 The RTL for a function can have several slots available: N is
13694 which slot to use. */
13697 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13699 struct stack_local_entry *s;
13701 gcc_assert (n < MAX_386_STACK_LOCALS);
13703 for (s = ix86_stack_locals; s; s = s->next)
13704 if (s->mode == mode && s->n == n)
13705 return copy_rtx (s->rtl);
13707 s = (struct stack_local_entry *)
13708 ggc_alloc (sizeof (struct stack_local_entry));
13709 s->n = n;
13710 s->mode = mode;
13711 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13713 s->next = ix86_stack_locals;
13714 ix86_stack_locals = s;
13715 return s->rtl;
13718 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13720 static GTY(()) rtx ix86_tls_symbol;
13722 ix86_tls_get_addr (void)
13725 if (!ix86_tls_symbol)
13727 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13728 (TARGET_ANY_GNU_TLS
13729 && !TARGET_64BIT)
13730 ? "___tls_get_addr"
13731 : "__tls_get_addr");
13734 return ix86_tls_symbol;
13737 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13739 static GTY(()) rtx ix86_tls_module_base_symbol;
13741 ix86_tls_module_base (void)
13744 if (!ix86_tls_module_base_symbol)
13746 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13747 "_TLS_MODULE_BASE_");
13748 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13749 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13752 return ix86_tls_module_base_symbol;
13755 /* Calculate the length of the memory address in the instruction
13756 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13759 memory_address_length (rtx addr)
13761 struct ix86_address parts;
13762 rtx base, index, disp;
13763 int len;
13764 int ok;
13766 if (GET_CODE (addr) == PRE_DEC
13767 || GET_CODE (addr) == POST_INC
13768 || GET_CODE (addr) == PRE_MODIFY
13769 || GET_CODE (addr) == POST_MODIFY)
13770 return 0;
13772 ok = ix86_decompose_address (addr, &parts);
13773 gcc_assert (ok);
13775 if (parts.base && GET_CODE (parts.base) == SUBREG)
13776 parts.base = SUBREG_REG (parts.base);
13777 if (parts.index && GET_CODE (parts.index) == SUBREG)
13778 parts.index = SUBREG_REG (parts.index);
13780 base = parts.base;
13781 index = parts.index;
13782 disp = parts.disp;
13783 len = 0;
13785 /* Rule of thumb:
13786 - esp as the base always wants an index,
13787 - ebp as the base always wants a displacement. */
13789 /* Register Indirect. */
13790 if (base && !index && !disp)
13792 /* esp (for its index) and ebp (for its displacement) need
13793 the two-byte modrm form. */
13794 if (addr == stack_pointer_rtx
13795 || addr == arg_pointer_rtx
13796 || addr == frame_pointer_rtx
13797 || addr == hard_frame_pointer_rtx)
13798 len = 1;
13801 /* Direct Addressing. */
13802 else if (disp && !base && !index)
13803 len = 4;
13805 else
13807 /* Find the length of the displacement constant. */
13808 if (disp)
13810 if (base && satisfies_constraint_K (disp))
13811 len = 1;
13812 else
13813 len = 4;
13815 /* ebp always wants a displacement. */
13816 else if (base == hard_frame_pointer_rtx)
13817 len = 1;
13819 /* An index requires the two-byte modrm form.... */
13820 if (index
13821 /* ...like esp, which always wants an index. */
13822 || base == stack_pointer_rtx
13823 || base == arg_pointer_rtx
13824 || base == frame_pointer_rtx)
13825 len += 1;
13828 return len;
13831 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13832 is set, expect that insn have 8bit immediate alternative. */
13834 ix86_attr_length_immediate_default (rtx insn, int shortform)
13836 int len = 0;
13837 int i;
13838 extract_insn_cached (insn);
13839 for (i = recog_data.n_operands - 1; i >= 0; --i)
13840 if (CONSTANT_P (recog_data.operand[i]))
13842 gcc_assert (!len);
13843 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13844 len = 1;
13845 else
13847 switch (get_attr_mode (insn))
13849 case MODE_QI:
13850 len+=1;
13851 break;
13852 case MODE_HI:
13853 len+=2;
13854 break;
13855 case MODE_SI:
13856 len+=4;
13857 break;
13858 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13859 case MODE_DI:
13860 len+=4;
13861 break;
13862 default:
13863 fatal_insn ("unknown insn mode", insn);
13867 return len;
13869 /* Compute default value for "length_address" attribute. */
13871 ix86_attr_length_address_default (rtx insn)
13873 int i;
13875 if (get_attr_type (insn) == TYPE_LEA)
13877 rtx set = PATTERN (insn);
13879 if (GET_CODE (set) == PARALLEL)
13880 set = XVECEXP (set, 0, 0);
13882 gcc_assert (GET_CODE (set) == SET);
13884 return memory_address_length (SET_SRC (set));
13887 extract_insn_cached (insn);
13888 for (i = recog_data.n_operands - 1; i >= 0; --i)
13889 if (GET_CODE (recog_data.operand[i]) == MEM)
13891 return memory_address_length (XEXP (recog_data.operand[i], 0));
13892 break;
13894 return 0;
13897 /* Return the maximum number of instructions a cpu can issue. */
13899 static int
13900 ix86_issue_rate (void)
13902 switch (ix86_tune)
13904 case PROCESSOR_PENTIUM:
13905 case PROCESSOR_K6:
13906 return 2;
13908 case PROCESSOR_PENTIUMPRO:
13909 case PROCESSOR_PENTIUM4:
13910 case PROCESSOR_ATHLON:
13911 case PROCESSOR_K8:
13912 case PROCESSOR_NOCONA:
13913 case PROCESSOR_GENERIC32:
13914 case PROCESSOR_GENERIC64:
13915 return 3;
13917 case PROCESSOR_CORE2:
13918 return 4;
13920 default:
13921 return 1;
13925 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13926 by DEP_INSN and nothing set by DEP_INSN. */
13928 static int
13929 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13931 rtx set, set2;
13933 /* Simplify the test for uninteresting insns. */
13934 if (insn_type != TYPE_SETCC
13935 && insn_type != TYPE_ICMOV
13936 && insn_type != TYPE_FCMOV
13937 && insn_type != TYPE_IBR)
13938 return 0;
13940 if ((set = single_set (dep_insn)) != 0)
13942 set = SET_DEST (set);
13943 set2 = NULL_RTX;
13945 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13946 && XVECLEN (PATTERN (dep_insn), 0) == 2
13947 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13948 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13950 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13951 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13953 else
13954 return 0;
13956 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13957 return 0;
13959 /* This test is true if the dependent insn reads the flags but
13960 not any other potentially set register. */
13961 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13962 return 0;
13964 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13965 return 0;
13967 return 1;
13970 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13971 address with operands set by DEP_INSN. */
13973 static int
13974 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13976 rtx addr;
13978 if (insn_type == TYPE_LEA
13979 && TARGET_PENTIUM)
13981 addr = PATTERN (insn);
13983 if (GET_CODE (addr) == PARALLEL)
13984 addr = XVECEXP (addr, 0, 0);
13986 gcc_assert (GET_CODE (addr) == SET);
13988 addr = SET_SRC (addr);
13990 else
13992 int i;
13993 extract_insn_cached (insn);
13994 for (i = recog_data.n_operands - 1; i >= 0; --i)
13995 if (GET_CODE (recog_data.operand[i]) == MEM)
13997 addr = XEXP (recog_data.operand[i], 0);
13998 goto found;
14000 return 0;
14001 found:;
14004 return modified_in_p (addr, dep_insn);
14007 static int
14008 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14010 enum attr_type insn_type, dep_insn_type;
14011 enum attr_memory memory;
14012 rtx set, set2;
14013 int dep_insn_code_number;
14015 /* Anti and output dependencies have zero cost on all CPUs. */
14016 if (REG_NOTE_KIND (link) != 0)
14017 return 0;
14019 dep_insn_code_number = recog_memoized (dep_insn);
14021 /* If we can't recognize the insns, we can't really do anything. */
14022 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14023 return cost;
14025 insn_type = get_attr_type (insn);
14026 dep_insn_type = get_attr_type (dep_insn);
14028 switch (ix86_tune)
14030 case PROCESSOR_PENTIUM:
14031 /* Address Generation Interlock adds a cycle of latency. */
14032 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14033 cost += 1;
14035 /* ??? Compares pair with jump/setcc. */
14036 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14037 cost = 0;
14039 /* Floating point stores require value to be ready one cycle earlier. */
14040 if (insn_type == TYPE_FMOV
14041 && get_attr_memory (insn) == MEMORY_STORE
14042 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14043 cost += 1;
14044 break;
14046 case PROCESSOR_PENTIUMPRO:
14047 memory = get_attr_memory (insn);
14049 /* INT->FP conversion is expensive. */
14050 if (get_attr_fp_int_src (dep_insn))
14051 cost += 5;
14053 /* There is one cycle extra latency between an FP op and a store. */
14054 if (insn_type == TYPE_FMOV
14055 && (set = single_set (dep_insn)) != NULL_RTX
14056 && (set2 = single_set (insn)) != NULL_RTX
14057 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14058 && GET_CODE (SET_DEST (set2)) == MEM)
14059 cost += 1;
14061 /* Show ability of reorder buffer to hide latency of load by executing
14062 in parallel with previous instruction in case
14063 previous instruction is not needed to compute the address. */
14064 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14065 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14067 /* Claim moves to take one cycle, as core can issue one load
14068 at time and the next load can start cycle later. */
14069 if (dep_insn_type == TYPE_IMOV
14070 || dep_insn_type == TYPE_FMOV)
14071 cost = 1;
14072 else if (cost > 1)
14073 cost--;
14075 break;
14077 case PROCESSOR_K6:
14078 memory = get_attr_memory (insn);
14080 /* The esp dependency is resolved before the instruction is really
14081 finished. */
14082 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14083 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14084 return 1;
14086 /* INT->FP conversion is expensive. */
14087 if (get_attr_fp_int_src (dep_insn))
14088 cost += 5;
14090 /* Show ability of reorder buffer to hide latency of load by executing
14091 in parallel with previous instruction in case
14092 previous instruction is not needed to compute the address. */
14093 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14094 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14096 /* Claim moves to take one cycle, as core can issue one load
14097 at time and the next load can start cycle later. */
14098 if (dep_insn_type == TYPE_IMOV
14099 || dep_insn_type == TYPE_FMOV)
14100 cost = 1;
14101 else if (cost > 2)
14102 cost -= 2;
14103 else
14104 cost = 1;
14106 break;
14108 case PROCESSOR_ATHLON:
14109 case PROCESSOR_K8:
14110 case PROCESSOR_GENERIC32:
14111 case PROCESSOR_GENERIC64:
14112 memory = get_attr_memory (insn);
14114 /* Show ability of reorder buffer to hide latency of load by executing
14115 in parallel with previous instruction in case
14116 previous instruction is not needed to compute the address. */
14117 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14118 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14120 enum attr_unit unit = get_attr_unit (insn);
14121 int loadcost = 3;
14123 /* Because of the difference between the length of integer and
14124 floating unit pipeline preparation stages, the memory operands
14125 for floating point are cheaper.
14127 ??? For Athlon it the difference is most probably 2. */
14128 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14129 loadcost = 3;
14130 else
14131 loadcost = TARGET_ATHLON ? 2 : 0;
14133 if (cost >= loadcost)
14134 cost -= loadcost;
14135 else
14136 cost = 0;
14139 default:
14140 break;
14143 return cost;
14146 /* How many alternative schedules to try. This should be as wide as the
14147 scheduling freedom in the DFA, but no wider. Making this value too
14148 large results extra work for the scheduler. */
14150 static int
14151 ia32_multipass_dfa_lookahead (void)
14153 if (ix86_tune == PROCESSOR_PENTIUM)
14154 return 2;
14156 if (ix86_tune == PROCESSOR_PENTIUMPRO
14157 || ix86_tune == PROCESSOR_K6)
14158 return 1;
14160 else
14161 return 0;
14165 /* Compute the alignment given to a constant that is being placed in memory.
14166 EXP is the constant and ALIGN is the alignment that the object would
14167 ordinarily have.
14168 The value of this function is used instead of that alignment to align
14169 the object. */
14172 ix86_constant_alignment (tree exp, int align)
14174 if (TREE_CODE (exp) == REAL_CST)
14176 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14177 return 64;
14178 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14179 return 128;
14181 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14182 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14183 return BITS_PER_WORD;
14185 return align;
14188 /* Compute the alignment for a static variable.
14189 TYPE is the data type, and ALIGN is the alignment that
14190 the object would ordinarily have. The value of this function is used
14191 instead of that alignment to align the object. */
14194 ix86_data_alignment (tree type, int align)
14196 int max_align = optimize_size ? BITS_PER_WORD : 256;
14198 if (AGGREGATE_TYPE_P (type)
14199 && TYPE_SIZE (type)
14200 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14201 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14202 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14203 && align < max_align)
14204 align = max_align;
14206 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14207 to 16byte boundary. */
14208 if (TARGET_64BIT)
14210 if (AGGREGATE_TYPE_P (type)
14211 && TYPE_SIZE (type)
14212 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14213 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14214 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14215 return 128;
14218 if (TREE_CODE (type) == ARRAY_TYPE)
14220 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14221 return 64;
14222 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14223 return 128;
14225 else if (TREE_CODE (type) == COMPLEX_TYPE)
14228 if (TYPE_MODE (type) == DCmode && align < 64)
14229 return 64;
14230 if (TYPE_MODE (type) == XCmode && align < 128)
14231 return 128;
14233 else if ((TREE_CODE (type) == RECORD_TYPE
14234 || TREE_CODE (type) == UNION_TYPE
14235 || TREE_CODE (type) == QUAL_UNION_TYPE)
14236 && TYPE_FIELDS (type))
14238 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14239 return 64;
14240 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14241 return 128;
14243 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14244 || TREE_CODE (type) == INTEGER_TYPE)
14246 if (TYPE_MODE (type) == DFmode && align < 64)
14247 return 64;
14248 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14249 return 128;
14252 return align;
14255 /* Compute the alignment for a local variable.
14256 TYPE is the data type, and ALIGN is the alignment that
14257 the object would ordinarily have. The value of this macro is used
14258 instead of that alignment to align the object. */
14261 ix86_local_alignment (tree type, int align)
14263 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14264 to 16byte boundary. */
14265 if (TARGET_64BIT)
14267 if (AGGREGATE_TYPE_P (type)
14268 && TYPE_SIZE (type)
14269 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14270 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14271 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14272 return 128;
14274 if (TREE_CODE (type) == ARRAY_TYPE)
14276 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14277 return 64;
14278 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14279 return 128;
14281 else if (TREE_CODE (type) == COMPLEX_TYPE)
14283 if (TYPE_MODE (type) == DCmode && align < 64)
14284 return 64;
14285 if (TYPE_MODE (type) == XCmode && align < 128)
14286 return 128;
14288 else if ((TREE_CODE (type) == RECORD_TYPE
14289 || TREE_CODE (type) == UNION_TYPE
14290 || TREE_CODE (type) == QUAL_UNION_TYPE)
14291 && TYPE_FIELDS (type))
14293 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14294 return 64;
14295 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14296 return 128;
14298 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14299 || TREE_CODE (type) == INTEGER_TYPE)
14302 if (TYPE_MODE (type) == DFmode && align < 64)
14303 return 64;
14304 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14305 return 128;
14307 return align;
14310 /* Emit RTL insns to initialize the variable parts of a trampoline.
14311 FNADDR is an RTX for the address of the function's pure code.
14312 CXT is an RTX for the static chain value for the function. */
14313 void
14314 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14316 if (!TARGET_64BIT)
14318 /* Compute offset from the end of the jmp to the target function. */
14319 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14320 plus_constant (tramp, 10),
14321 NULL_RTX, 1, OPTAB_DIRECT);
14322 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14323 gen_int_mode (0xb9, QImode));
14324 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14325 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14326 gen_int_mode (0xe9, QImode));
14327 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14329 else
14331 int offset = 0;
14332 /* Try to load address using shorter movl instead of movabs.
14333 We may want to support movq for kernel mode, but kernel does not use
14334 trampolines at the moment. */
14335 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14337 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14338 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14339 gen_int_mode (0xbb41, HImode));
14340 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14341 gen_lowpart (SImode, fnaddr));
14342 offset += 6;
14344 else
14346 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14347 gen_int_mode (0xbb49, HImode));
14348 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14349 fnaddr);
14350 offset += 10;
14352 /* Load static chain using movabs to r10. */
14353 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14354 gen_int_mode (0xba49, HImode));
14355 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14356 cxt);
14357 offset += 10;
14358 /* Jump to the r11 */
14359 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14360 gen_int_mode (0xff49, HImode));
14361 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14362 gen_int_mode (0xe3, QImode));
14363 offset += 3;
14364 gcc_assert (offset <= TRAMPOLINE_SIZE);
14367 #ifdef ENABLE_EXECUTE_STACK
14368 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14369 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14370 #endif
14373 /* Codes for all the SSE/MMX builtins. */
14374 enum ix86_builtins
14376 IX86_BUILTIN_ADDPS,
14377 IX86_BUILTIN_ADDSS,
14378 IX86_BUILTIN_DIVPS,
14379 IX86_BUILTIN_DIVSS,
14380 IX86_BUILTIN_MULPS,
14381 IX86_BUILTIN_MULSS,
14382 IX86_BUILTIN_SUBPS,
14383 IX86_BUILTIN_SUBSS,
14385 IX86_BUILTIN_CMPEQPS,
14386 IX86_BUILTIN_CMPLTPS,
14387 IX86_BUILTIN_CMPLEPS,
14388 IX86_BUILTIN_CMPGTPS,
14389 IX86_BUILTIN_CMPGEPS,
14390 IX86_BUILTIN_CMPNEQPS,
14391 IX86_BUILTIN_CMPNLTPS,
14392 IX86_BUILTIN_CMPNLEPS,
14393 IX86_BUILTIN_CMPNGTPS,
14394 IX86_BUILTIN_CMPNGEPS,
14395 IX86_BUILTIN_CMPORDPS,
14396 IX86_BUILTIN_CMPUNORDPS,
14397 IX86_BUILTIN_CMPEQSS,
14398 IX86_BUILTIN_CMPLTSS,
14399 IX86_BUILTIN_CMPLESS,
14400 IX86_BUILTIN_CMPNEQSS,
14401 IX86_BUILTIN_CMPNLTSS,
14402 IX86_BUILTIN_CMPNLESS,
14403 IX86_BUILTIN_CMPNGTSS,
14404 IX86_BUILTIN_CMPNGESS,
14405 IX86_BUILTIN_CMPORDSS,
14406 IX86_BUILTIN_CMPUNORDSS,
14408 IX86_BUILTIN_COMIEQSS,
14409 IX86_BUILTIN_COMILTSS,
14410 IX86_BUILTIN_COMILESS,
14411 IX86_BUILTIN_COMIGTSS,
14412 IX86_BUILTIN_COMIGESS,
14413 IX86_BUILTIN_COMINEQSS,
14414 IX86_BUILTIN_UCOMIEQSS,
14415 IX86_BUILTIN_UCOMILTSS,
14416 IX86_BUILTIN_UCOMILESS,
14417 IX86_BUILTIN_UCOMIGTSS,
14418 IX86_BUILTIN_UCOMIGESS,
14419 IX86_BUILTIN_UCOMINEQSS,
14421 IX86_BUILTIN_CVTPI2PS,
14422 IX86_BUILTIN_CVTPS2PI,
14423 IX86_BUILTIN_CVTSI2SS,
14424 IX86_BUILTIN_CVTSI642SS,
14425 IX86_BUILTIN_CVTSS2SI,
14426 IX86_BUILTIN_CVTSS2SI64,
14427 IX86_BUILTIN_CVTTPS2PI,
14428 IX86_BUILTIN_CVTTSS2SI,
14429 IX86_BUILTIN_CVTTSS2SI64,
14431 IX86_BUILTIN_MAXPS,
14432 IX86_BUILTIN_MAXSS,
14433 IX86_BUILTIN_MINPS,
14434 IX86_BUILTIN_MINSS,
14436 IX86_BUILTIN_LOADUPS,
14437 IX86_BUILTIN_STOREUPS,
14438 IX86_BUILTIN_MOVSS,
14440 IX86_BUILTIN_MOVHLPS,
14441 IX86_BUILTIN_MOVLHPS,
14442 IX86_BUILTIN_LOADHPS,
14443 IX86_BUILTIN_LOADLPS,
14444 IX86_BUILTIN_STOREHPS,
14445 IX86_BUILTIN_STORELPS,
14447 IX86_BUILTIN_MASKMOVQ,
14448 IX86_BUILTIN_MOVMSKPS,
14449 IX86_BUILTIN_PMOVMSKB,
14451 IX86_BUILTIN_MOVNTPS,
14452 IX86_BUILTIN_MOVNTQ,
14454 IX86_BUILTIN_LOADDQU,
14455 IX86_BUILTIN_STOREDQU,
14457 IX86_BUILTIN_PACKSSWB,
14458 IX86_BUILTIN_PACKSSDW,
14459 IX86_BUILTIN_PACKUSWB,
14461 IX86_BUILTIN_PADDB,
14462 IX86_BUILTIN_PADDW,
14463 IX86_BUILTIN_PADDD,
14464 IX86_BUILTIN_PADDQ,
14465 IX86_BUILTIN_PADDSB,
14466 IX86_BUILTIN_PADDSW,
14467 IX86_BUILTIN_PADDUSB,
14468 IX86_BUILTIN_PADDUSW,
14469 IX86_BUILTIN_PSUBB,
14470 IX86_BUILTIN_PSUBW,
14471 IX86_BUILTIN_PSUBD,
14472 IX86_BUILTIN_PSUBQ,
14473 IX86_BUILTIN_PSUBSB,
14474 IX86_BUILTIN_PSUBSW,
14475 IX86_BUILTIN_PSUBUSB,
14476 IX86_BUILTIN_PSUBUSW,
14478 IX86_BUILTIN_PAND,
14479 IX86_BUILTIN_PANDN,
14480 IX86_BUILTIN_POR,
14481 IX86_BUILTIN_PXOR,
14483 IX86_BUILTIN_PAVGB,
14484 IX86_BUILTIN_PAVGW,
14486 IX86_BUILTIN_PCMPEQB,
14487 IX86_BUILTIN_PCMPEQW,
14488 IX86_BUILTIN_PCMPEQD,
14489 IX86_BUILTIN_PCMPGTB,
14490 IX86_BUILTIN_PCMPGTW,
14491 IX86_BUILTIN_PCMPGTD,
14493 IX86_BUILTIN_PMADDWD,
14495 IX86_BUILTIN_PMAXSW,
14496 IX86_BUILTIN_PMAXUB,
14497 IX86_BUILTIN_PMINSW,
14498 IX86_BUILTIN_PMINUB,
14500 IX86_BUILTIN_PMULHUW,
14501 IX86_BUILTIN_PMULHW,
14502 IX86_BUILTIN_PMULLW,
14504 IX86_BUILTIN_PSADBW,
14505 IX86_BUILTIN_PSHUFW,
14507 IX86_BUILTIN_PSLLW,
14508 IX86_BUILTIN_PSLLD,
14509 IX86_BUILTIN_PSLLQ,
14510 IX86_BUILTIN_PSRAW,
14511 IX86_BUILTIN_PSRAD,
14512 IX86_BUILTIN_PSRLW,
14513 IX86_BUILTIN_PSRLD,
14514 IX86_BUILTIN_PSRLQ,
14515 IX86_BUILTIN_PSLLWI,
14516 IX86_BUILTIN_PSLLDI,
14517 IX86_BUILTIN_PSLLQI,
14518 IX86_BUILTIN_PSRAWI,
14519 IX86_BUILTIN_PSRADI,
14520 IX86_BUILTIN_PSRLWI,
14521 IX86_BUILTIN_PSRLDI,
14522 IX86_BUILTIN_PSRLQI,
14524 IX86_BUILTIN_PUNPCKHBW,
14525 IX86_BUILTIN_PUNPCKHWD,
14526 IX86_BUILTIN_PUNPCKHDQ,
14527 IX86_BUILTIN_PUNPCKLBW,
14528 IX86_BUILTIN_PUNPCKLWD,
14529 IX86_BUILTIN_PUNPCKLDQ,
14531 IX86_BUILTIN_SHUFPS,
14533 IX86_BUILTIN_RCPPS,
14534 IX86_BUILTIN_RCPSS,
14535 IX86_BUILTIN_RSQRTPS,
14536 IX86_BUILTIN_RSQRTSS,
14537 IX86_BUILTIN_SQRTPS,
14538 IX86_BUILTIN_SQRTSS,
14540 IX86_BUILTIN_UNPCKHPS,
14541 IX86_BUILTIN_UNPCKLPS,
14543 IX86_BUILTIN_ANDPS,
14544 IX86_BUILTIN_ANDNPS,
14545 IX86_BUILTIN_ORPS,
14546 IX86_BUILTIN_XORPS,
14548 IX86_BUILTIN_EMMS,
14549 IX86_BUILTIN_LDMXCSR,
14550 IX86_BUILTIN_STMXCSR,
14551 IX86_BUILTIN_SFENCE,
14553 /* 3DNow! Original */
14554 IX86_BUILTIN_FEMMS,
14555 IX86_BUILTIN_PAVGUSB,
14556 IX86_BUILTIN_PF2ID,
14557 IX86_BUILTIN_PFACC,
14558 IX86_BUILTIN_PFADD,
14559 IX86_BUILTIN_PFCMPEQ,
14560 IX86_BUILTIN_PFCMPGE,
14561 IX86_BUILTIN_PFCMPGT,
14562 IX86_BUILTIN_PFMAX,
14563 IX86_BUILTIN_PFMIN,
14564 IX86_BUILTIN_PFMUL,
14565 IX86_BUILTIN_PFRCP,
14566 IX86_BUILTIN_PFRCPIT1,
14567 IX86_BUILTIN_PFRCPIT2,
14568 IX86_BUILTIN_PFRSQIT1,
14569 IX86_BUILTIN_PFRSQRT,
14570 IX86_BUILTIN_PFSUB,
14571 IX86_BUILTIN_PFSUBR,
14572 IX86_BUILTIN_PI2FD,
14573 IX86_BUILTIN_PMULHRW,
14575 /* 3DNow! Athlon Extensions */
14576 IX86_BUILTIN_PF2IW,
14577 IX86_BUILTIN_PFNACC,
14578 IX86_BUILTIN_PFPNACC,
14579 IX86_BUILTIN_PI2FW,
14580 IX86_BUILTIN_PSWAPDSI,
14581 IX86_BUILTIN_PSWAPDSF,
14583 /* SSE2 */
14584 IX86_BUILTIN_ADDPD,
14585 IX86_BUILTIN_ADDSD,
14586 IX86_BUILTIN_DIVPD,
14587 IX86_BUILTIN_DIVSD,
14588 IX86_BUILTIN_MULPD,
14589 IX86_BUILTIN_MULSD,
14590 IX86_BUILTIN_SUBPD,
14591 IX86_BUILTIN_SUBSD,
14593 IX86_BUILTIN_CMPEQPD,
14594 IX86_BUILTIN_CMPLTPD,
14595 IX86_BUILTIN_CMPLEPD,
14596 IX86_BUILTIN_CMPGTPD,
14597 IX86_BUILTIN_CMPGEPD,
14598 IX86_BUILTIN_CMPNEQPD,
14599 IX86_BUILTIN_CMPNLTPD,
14600 IX86_BUILTIN_CMPNLEPD,
14601 IX86_BUILTIN_CMPNGTPD,
14602 IX86_BUILTIN_CMPNGEPD,
14603 IX86_BUILTIN_CMPORDPD,
14604 IX86_BUILTIN_CMPUNORDPD,
14605 IX86_BUILTIN_CMPNEPD,
14606 IX86_BUILTIN_CMPEQSD,
14607 IX86_BUILTIN_CMPLTSD,
14608 IX86_BUILTIN_CMPLESD,
14609 IX86_BUILTIN_CMPNEQSD,
14610 IX86_BUILTIN_CMPNLTSD,
14611 IX86_BUILTIN_CMPNLESD,
14612 IX86_BUILTIN_CMPORDSD,
14613 IX86_BUILTIN_CMPUNORDSD,
14614 IX86_BUILTIN_CMPNESD,
14616 IX86_BUILTIN_COMIEQSD,
14617 IX86_BUILTIN_COMILTSD,
14618 IX86_BUILTIN_COMILESD,
14619 IX86_BUILTIN_COMIGTSD,
14620 IX86_BUILTIN_COMIGESD,
14621 IX86_BUILTIN_COMINEQSD,
14622 IX86_BUILTIN_UCOMIEQSD,
14623 IX86_BUILTIN_UCOMILTSD,
14624 IX86_BUILTIN_UCOMILESD,
14625 IX86_BUILTIN_UCOMIGTSD,
14626 IX86_BUILTIN_UCOMIGESD,
14627 IX86_BUILTIN_UCOMINEQSD,
14629 IX86_BUILTIN_MAXPD,
14630 IX86_BUILTIN_MAXSD,
14631 IX86_BUILTIN_MINPD,
14632 IX86_BUILTIN_MINSD,
14634 IX86_BUILTIN_ANDPD,
14635 IX86_BUILTIN_ANDNPD,
14636 IX86_BUILTIN_ORPD,
14637 IX86_BUILTIN_XORPD,
14639 IX86_BUILTIN_SQRTPD,
14640 IX86_BUILTIN_SQRTSD,
14642 IX86_BUILTIN_UNPCKHPD,
14643 IX86_BUILTIN_UNPCKLPD,
14645 IX86_BUILTIN_SHUFPD,
14647 IX86_BUILTIN_LOADUPD,
14648 IX86_BUILTIN_STOREUPD,
14649 IX86_BUILTIN_MOVSD,
14651 IX86_BUILTIN_LOADHPD,
14652 IX86_BUILTIN_LOADLPD,
14654 IX86_BUILTIN_CVTDQ2PD,
14655 IX86_BUILTIN_CVTDQ2PS,
14657 IX86_BUILTIN_CVTPD2DQ,
14658 IX86_BUILTIN_CVTPD2PI,
14659 IX86_BUILTIN_CVTPD2PS,
14660 IX86_BUILTIN_CVTTPD2DQ,
14661 IX86_BUILTIN_CVTTPD2PI,
14663 IX86_BUILTIN_CVTPI2PD,
14664 IX86_BUILTIN_CVTSI2SD,
14665 IX86_BUILTIN_CVTSI642SD,
14667 IX86_BUILTIN_CVTSD2SI,
14668 IX86_BUILTIN_CVTSD2SI64,
14669 IX86_BUILTIN_CVTSD2SS,
14670 IX86_BUILTIN_CVTSS2SD,
14671 IX86_BUILTIN_CVTTSD2SI,
14672 IX86_BUILTIN_CVTTSD2SI64,
14674 IX86_BUILTIN_CVTPS2DQ,
14675 IX86_BUILTIN_CVTPS2PD,
14676 IX86_BUILTIN_CVTTPS2DQ,
14678 IX86_BUILTIN_MOVNTI,
14679 IX86_BUILTIN_MOVNTPD,
14680 IX86_BUILTIN_MOVNTDQ,
14682 /* SSE2 MMX */
14683 IX86_BUILTIN_MASKMOVDQU,
14684 IX86_BUILTIN_MOVMSKPD,
14685 IX86_BUILTIN_PMOVMSKB128,
14687 IX86_BUILTIN_PACKSSWB128,
14688 IX86_BUILTIN_PACKSSDW128,
14689 IX86_BUILTIN_PACKUSWB128,
14691 IX86_BUILTIN_PADDB128,
14692 IX86_BUILTIN_PADDW128,
14693 IX86_BUILTIN_PADDD128,
14694 IX86_BUILTIN_PADDQ128,
14695 IX86_BUILTIN_PADDSB128,
14696 IX86_BUILTIN_PADDSW128,
14697 IX86_BUILTIN_PADDUSB128,
14698 IX86_BUILTIN_PADDUSW128,
14699 IX86_BUILTIN_PSUBB128,
14700 IX86_BUILTIN_PSUBW128,
14701 IX86_BUILTIN_PSUBD128,
14702 IX86_BUILTIN_PSUBQ128,
14703 IX86_BUILTIN_PSUBSB128,
14704 IX86_BUILTIN_PSUBSW128,
14705 IX86_BUILTIN_PSUBUSB128,
14706 IX86_BUILTIN_PSUBUSW128,
14708 IX86_BUILTIN_PAND128,
14709 IX86_BUILTIN_PANDN128,
14710 IX86_BUILTIN_POR128,
14711 IX86_BUILTIN_PXOR128,
14713 IX86_BUILTIN_PAVGB128,
14714 IX86_BUILTIN_PAVGW128,
14716 IX86_BUILTIN_PCMPEQB128,
14717 IX86_BUILTIN_PCMPEQW128,
14718 IX86_BUILTIN_PCMPEQD128,
14719 IX86_BUILTIN_PCMPGTB128,
14720 IX86_BUILTIN_PCMPGTW128,
14721 IX86_BUILTIN_PCMPGTD128,
14723 IX86_BUILTIN_PMADDWD128,
14725 IX86_BUILTIN_PMAXSW128,
14726 IX86_BUILTIN_PMAXUB128,
14727 IX86_BUILTIN_PMINSW128,
14728 IX86_BUILTIN_PMINUB128,
14730 IX86_BUILTIN_PMULUDQ,
14731 IX86_BUILTIN_PMULUDQ128,
14732 IX86_BUILTIN_PMULHUW128,
14733 IX86_BUILTIN_PMULHW128,
14734 IX86_BUILTIN_PMULLW128,
14736 IX86_BUILTIN_PSADBW128,
14737 IX86_BUILTIN_PSHUFHW,
14738 IX86_BUILTIN_PSHUFLW,
14739 IX86_BUILTIN_PSHUFD,
14741 IX86_BUILTIN_PSLLW128,
14742 IX86_BUILTIN_PSLLD128,
14743 IX86_BUILTIN_PSLLQ128,
14744 IX86_BUILTIN_PSRAW128,
14745 IX86_BUILTIN_PSRAD128,
14746 IX86_BUILTIN_PSRLW128,
14747 IX86_BUILTIN_PSRLD128,
14748 IX86_BUILTIN_PSRLQ128,
14749 IX86_BUILTIN_PSLLDQI128,
14750 IX86_BUILTIN_PSLLWI128,
14751 IX86_BUILTIN_PSLLDI128,
14752 IX86_BUILTIN_PSLLQI128,
14753 IX86_BUILTIN_PSRAWI128,
14754 IX86_BUILTIN_PSRADI128,
14755 IX86_BUILTIN_PSRLDQI128,
14756 IX86_BUILTIN_PSRLWI128,
14757 IX86_BUILTIN_PSRLDI128,
14758 IX86_BUILTIN_PSRLQI128,
14760 IX86_BUILTIN_PUNPCKHBW128,
14761 IX86_BUILTIN_PUNPCKHWD128,
14762 IX86_BUILTIN_PUNPCKHDQ128,
14763 IX86_BUILTIN_PUNPCKHQDQ128,
14764 IX86_BUILTIN_PUNPCKLBW128,
14765 IX86_BUILTIN_PUNPCKLWD128,
14766 IX86_BUILTIN_PUNPCKLDQ128,
14767 IX86_BUILTIN_PUNPCKLQDQ128,
14769 IX86_BUILTIN_CLFLUSH,
14770 IX86_BUILTIN_MFENCE,
14771 IX86_BUILTIN_LFENCE,
14773 /* Prescott New Instructions. */
14774 IX86_BUILTIN_ADDSUBPS,
14775 IX86_BUILTIN_HADDPS,
14776 IX86_BUILTIN_HSUBPS,
14777 IX86_BUILTIN_MOVSHDUP,
14778 IX86_BUILTIN_MOVSLDUP,
14779 IX86_BUILTIN_ADDSUBPD,
14780 IX86_BUILTIN_HADDPD,
14781 IX86_BUILTIN_HSUBPD,
14782 IX86_BUILTIN_LDDQU,
14784 IX86_BUILTIN_MONITOR,
14785 IX86_BUILTIN_MWAIT,
14787 /* SSSE3. */
14788 IX86_BUILTIN_PHADDW,
14789 IX86_BUILTIN_PHADDD,
14790 IX86_BUILTIN_PHADDSW,
14791 IX86_BUILTIN_PHSUBW,
14792 IX86_BUILTIN_PHSUBD,
14793 IX86_BUILTIN_PHSUBSW,
14794 IX86_BUILTIN_PMADDUBSW,
14795 IX86_BUILTIN_PMULHRSW,
14796 IX86_BUILTIN_PSHUFB,
14797 IX86_BUILTIN_PSIGNB,
14798 IX86_BUILTIN_PSIGNW,
14799 IX86_BUILTIN_PSIGND,
14800 IX86_BUILTIN_PALIGNR,
14801 IX86_BUILTIN_PABSB,
14802 IX86_BUILTIN_PABSW,
14803 IX86_BUILTIN_PABSD,
14805 IX86_BUILTIN_PHADDW128,
14806 IX86_BUILTIN_PHADDD128,
14807 IX86_BUILTIN_PHADDSW128,
14808 IX86_BUILTIN_PHSUBW128,
14809 IX86_BUILTIN_PHSUBD128,
14810 IX86_BUILTIN_PHSUBSW128,
14811 IX86_BUILTIN_PMADDUBSW128,
14812 IX86_BUILTIN_PMULHRSW128,
14813 IX86_BUILTIN_PSHUFB128,
14814 IX86_BUILTIN_PSIGNB128,
14815 IX86_BUILTIN_PSIGNW128,
14816 IX86_BUILTIN_PSIGND128,
14817 IX86_BUILTIN_PALIGNR128,
14818 IX86_BUILTIN_PABSB128,
14819 IX86_BUILTIN_PABSW128,
14820 IX86_BUILTIN_PABSD128,
14822 IX86_BUILTIN_VEC_INIT_V2SI,
14823 IX86_BUILTIN_VEC_INIT_V4HI,
14824 IX86_BUILTIN_VEC_INIT_V8QI,
14825 IX86_BUILTIN_VEC_EXT_V2DF,
14826 IX86_BUILTIN_VEC_EXT_V2DI,
14827 IX86_BUILTIN_VEC_EXT_V4SF,
14828 IX86_BUILTIN_VEC_EXT_V4SI,
14829 IX86_BUILTIN_VEC_EXT_V8HI,
14830 IX86_BUILTIN_VEC_EXT_V2SI,
14831 IX86_BUILTIN_VEC_EXT_V4HI,
14832 IX86_BUILTIN_VEC_SET_V8HI,
14833 IX86_BUILTIN_VEC_SET_V4HI,
14835 IX86_BUILTIN_MAX
14838 #define def_builtin(MASK, NAME, TYPE, CODE) \
14839 do { \
14840 if ((MASK) & target_flags \
14841 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14842 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14843 NULL, NULL_TREE); \
14844 } while (0)
14846 /* Bits for builtin_description.flag. */
14848 /* Set when we don't support the comparison natively, and should
14849 swap_comparison in order to support it. */
14850 #define BUILTIN_DESC_SWAP_OPERANDS 1
14852 struct builtin_description
14854 const unsigned int mask;
14855 const enum insn_code icode;
14856 const char *const name;
14857 const enum ix86_builtins code;
14858 const enum rtx_code comparison;
14859 const unsigned int flag;
14862 static const struct builtin_description bdesc_comi[] =
14864 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14865 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14866 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14867 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14868 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14869 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14870 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14871 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14872 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14873 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14874 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14875 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14876 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14877 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14878 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14879 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14880 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14881 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14882 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14883 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14884 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14885 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14886 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14887 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14890 static const struct builtin_description bdesc_2arg[] =
14892 /* SSE */
14893 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14894 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14895 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14896 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14897 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14898 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14899 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14900 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14902 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14903 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14904 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14905 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14906 BUILTIN_DESC_SWAP_OPERANDS },
14907 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14908 BUILTIN_DESC_SWAP_OPERANDS },
14909 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14910 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14911 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14912 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14913 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14914 BUILTIN_DESC_SWAP_OPERANDS },
14915 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14916 BUILTIN_DESC_SWAP_OPERANDS },
14917 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14918 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14919 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14920 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14921 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14922 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14923 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14924 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14925 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14926 BUILTIN_DESC_SWAP_OPERANDS },
14927 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14928 BUILTIN_DESC_SWAP_OPERANDS },
14929 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14931 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14932 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14933 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14934 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14936 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14937 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14938 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14939 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14941 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14942 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14943 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14944 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14945 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14947 /* MMX */
14948 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14949 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14950 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14951 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14952 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14953 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14954 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14955 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14957 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14958 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14959 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14960 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14961 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14962 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14963 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14964 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14966 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14967 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14968 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14970 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14971 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14972 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14973 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14975 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14976 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14978 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14979 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14980 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14981 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14982 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14983 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14985 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14986 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14987 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14988 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14990 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14991 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14992 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14993 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14994 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14995 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14997 /* Special. */
14998 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14999 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15000 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15002 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15003 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15004 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15006 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15007 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15008 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15009 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15010 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15011 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15013 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15014 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15015 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15016 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15017 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15018 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15020 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15021 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15022 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15023 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15025 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15026 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15028 /* SSE2 */
15029 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15030 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15031 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15032 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15033 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15034 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15035 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15036 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15038 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15039 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15040 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15041 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15042 BUILTIN_DESC_SWAP_OPERANDS },
15043 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15044 BUILTIN_DESC_SWAP_OPERANDS },
15045 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15046 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15047 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15048 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15049 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15050 BUILTIN_DESC_SWAP_OPERANDS },
15051 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15052 BUILTIN_DESC_SWAP_OPERANDS },
15053 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15054 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15055 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15056 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15057 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15058 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15059 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15060 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15061 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15063 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15064 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15065 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15066 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15068 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15069 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15070 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15071 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15073 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15074 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15075 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15077 /* SSE2 MMX */
15078 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15079 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15080 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15081 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15082 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15083 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15084 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15085 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15087 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15088 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15089 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15090 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15091 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15092 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15093 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15094 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15096 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15097 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15099 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15100 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15101 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15102 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15104 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15105 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15107 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15108 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15109 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15110 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15111 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15112 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15114 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15115 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15116 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15117 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15119 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15120 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15121 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15122 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15123 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15124 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15125 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15126 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15128 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15129 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15130 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15132 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15133 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15135 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15136 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15138 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15139 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15140 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15142 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15143 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15144 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15146 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15147 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15149 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15151 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15152 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15153 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15154 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15156 /* SSE3 MMX */
15157 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15158 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15159 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15160 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15161 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15162 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15164 /* SSSE3 */
15165 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15166 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15167 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15168 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15169 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15170 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15171 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15172 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15173 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15174 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15175 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15176 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15177 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15178 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15179 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15180 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15181 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15182 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15183 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15184 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15185 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15186 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15187 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15188 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15191 static const struct builtin_description bdesc_1arg[] =
15193 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15194 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15196 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15197 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15198 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15200 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15201 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15202 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15203 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15204 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15205 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15207 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15208 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15210 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15212 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15213 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15215 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15216 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15217 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15218 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15219 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15221 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15223 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15224 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15225 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15226 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15228 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15229 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15230 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15232 /* SSE3 */
15233 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15234 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15236 /* SSSE3 */
15237 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15238 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15239 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15240 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15241 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15242 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15245 static void
15246 ix86_init_builtins (void)
15248 if (TARGET_MMX)
15249 ix86_init_mmx_sse_builtins ();
15252 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15253 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15254 builtins. */
15255 static void
15256 ix86_init_mmx_sse_builtins (void)
15258 const struct builtin_description * d;
15259 size_t i;
15261 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15262 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15263 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15264 tree V2DI_type_node
15265 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15266 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15267 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15268 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15269 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15270 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15271 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15273 tree pchar_type_node = build_pointer_type (char_type_node);
15274 tree pcchar_type_node = build_pointer_type (
15275 build_type_variant (char_type_node, 1, 0));
15276 tree pfloat_type_node = build_pointer_type (float_type_node);
15277 tree pcfloat_type_node = build_pointer_type (
15278 build_type_variant (float_type_node, 1, 0));
15279 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15280 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15281 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15283 /* Comparisons. */
15284 tree int_ftype_v4sf_v4sf
15285 = build_function_type_list (integer_type_node,
15286 V4SF_type_node, V4SF_type_node, NULL_TREE);
15287 tree v4si_ftype_v4sf_v4sf
15288 = build_function_type_list (V4SI_type_node,
15289 V4SF_type_node, V4SF_type_node, NULL_TREE);
15290 /* MMX/SSE/integer conversions. */
15291 tree int_ftype_v4sf
15292 = build_function_type_list (integer_type_node,
15293 V4SF_type_node, NULL_TREE);
15294 tree int64_ftype_v4sf
15295 = build_function_type_list (long_long_integer_type_node,
15296 V4SF_type_node, NULL_TREE);
15297 tree int_ftype_v8qi
15298 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15299 tree v4sf_ftype_v4sf_int
15300 = build_function_type_list (V4SF_type_node,
15301 V4SF_type_node, integer_type_node, NULL_TREE);
15302 tree v4sf_ftype_v4sf_int64
15303 = build_function_type_list (V4SF_type_node,
15304 V4SF_type_node, long_long_integer_type_node,
15305 NULL_TREE);
15306 tree v4sf_ftype_v4sf_v2si
15307 = build_function_type_list (V4SF_type_node,
15308 V4SF_type_node, V2SI_type_node, NULL_TREE);
15310 /* Miscellaneous. */
15311 tree v8qi_ftype_v4hi_v4hi
15312 = build_function_type_list (V8QI_type_node,
15313 V4HI_type_node, V4HI_type_node, NULL_TREE);
15314 tree v4hi_ftype_v2si_v2si
15315 = build_function_type_list (V4HI_type_node,
15316 V2SI_type_node, V2SI_type_node, NULL_TREE);
15317 tree v4sf_ftype_v4sf_v4sf_int
15318 = build_function_type_list (V4SF_type_node,
15319 V4SF_type_node, V4SF_type_node,
15320 integer_type_node, NULL_TREE);
15321 tree v2si_ftype_v4hi_v4hi
15322 = build_function_type_list (V2SI_type_node,
15323 V4HI_type_node, V4HI_type_node, NULL_TREE);
15324 tree v4hi_ftype_v4hi_int
15325 = build_function_type_list (V4HI_type_node,
15326 V4HI_type_node, integer_type_node, NULL_TREE);
15327 tree v4hi_ftype_v4hi_di
15328 = build_function_type_list (V4HI_type_node,
15329 V4HI_type_node, long_long_unsigned_type_node,
15330 NULL_TREE);
15331 tree v2si_ftype_v2si_di
15332 = build_function_type_list (V2SI_type_node,
15333 V2SI_type_node, long_long_unsigned_type_node,
15334 NULL_TREE);
15335 tree void_ftype_void
15336 = build_function_type (void_type_node, void_list_node);
15337 tree void_ftype_unsigned
15338 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15339 tree void_ftype_unsigned_unsigned
15340 = build_function_type_list (void_type_node, unsigned_type_node,
15341 unsigned_type_node, NULL_TREE);
15342 tree void_ftype_pcvoid_unsigned_unsigned
15343 = build_function_type_list (void_type_node, const_ptr_type_node,
15344 unsigned_type_node, unsigned_type_node,
15345 NULL_TREE);
15346 tree unsigned_ftype_void
15347 = build_function_type (unsigned_type_node, void_list_node);
15348 tree v2si_ftype_v4sf
15349 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15350 /* Loads/stores. */
15351 tree void_ftype_v8qi_v8qi_pchar
15352 = build_function_type_list (void_type_node,
15353 V8QI_type_node, V8QI_type_node,
15354 pchar_type_node, NULL_TREE);
15355 tree v4sf_ftype_pcfloat
15356 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15357 /* @@@ the type is bogus */
15358 tree v4sf_ftype_v4sf_pv2si
15359 = build_function_type_list (V4SF_type_node,
15360 V4SF_type_node, pv2si_type_node, NULL_TREE);
15361 tree void_ftype_pv2si_v4sf
15362 = build_function_type_list (void_type_node,
15363 pv2si_type_node, V4SF_type_node, NULL_TREE);
15364 tree void_ftype_pfloat_v4sf
15365 = build_function_type_list (void_type_node,
15366 pfloat_type_node, V4SF_type_node, NULL_TREE);
15367 tree void_ftype_pdi_di
15368 = build_function_type_list (void_type_node,
15369 pdi_type_node, long_long_unsigned_type_node,
15370 NULL_TREE);
15371 tree void_ftype_pv2di_v2di
15372 = build_function_type_list (void_type_node,
15373 pv2di_type_node, V2DI_type_node, NULL_TREE);
15374 /* Normal vector unops. */
15375 tree v4sf_ftype_v4sf
15376 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15377 tree v16qi_ftype_v16qi
15378 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15379 tree v8hi_ftype_v8hi
15380 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15381 tree v4si_ftype_v4si
15382 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15383 tree v8qi_ftype_v8qi
15384 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15385 tree v4hi_ftype_v4hi
15386 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15388 /* Normal vector binops. */
15389 tree v4sf_ftype_v4sf_v4sf
15390 = build_function_type_list (V4SF_type_node,
15391 V4SF_type_node, V4SF_type_node, NULL_TREE);
15392 tree v8qi_ftype_v8qi_v8qi
15393 = build_function_type_list (V8QI_type_node,
15394 V8QI_type_node, V8QI_type_node, NULL_TREE);
15395 tree v4hi_ftype_v4hi_v4hi
15396 = build_function_type_list (V4HI_type_node,
15397 V4HI_type_node, V4HI_type_node, NULL_TREE);
15398 tree v2si_ftype_v2si_v2si
15399 = build_function_type_list (V2SI_type_node,
15400 V2SI_type_node, V2SI_type_node, NULL_TREE);
15401 tree di_ftype_di_di
15402 = build_function_type_list (long_long_unsigned_type_node,
15403 long_long_unsigned_type_node,
15404 long_long_unsigned_type_node, NULL_TREE);
15406 tree di_ftype_di_di_int
15407 = build_function_type_list (long_long_unsigned_type_node,
15408 long_long_unsigned_type_node,
15409 long_long_unsigned_type_node,
15410 integer_type_node, NULL_TREE);
15412 tree v2si_ftype_v2sf
15413 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15414 tree v2sf_ftype_v2si
15415 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15416 tree v2si_ftype_v2si
15417 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15418 tree v2sf_ftype_v2sf
15419 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15420 tree v2sf_ftype_v2sf_v2sf
15421 = build_function_type_list (V2SF_type_node,
15422 V2SF_type_node, V2SF_type_node, NULL_TREE);
15423 tree v2si_ftype_v2sf_v2sf
15424 = build_function_type_list (V2SI_type_node,
15425 V2SF_type_node, V2SF_type_node, NULL_TREE);
15426 tree pint_type_node = build_pointer_type (integer_type_node);
15427 tree pdouble_type_node = build_pointer_type (double_type_node);
15428 tree pcdouble_type_node = build_pointer_type (
15429 build_type_variant (double_type_node, 1, 0));
15430 tree int_ftype_v2df_v2df
15431 = build_function_type_list (integer_type_node,
15432 V2DF_type_node, V2DF_type_node, NULL_TREE);
15434 tree void_ftype_pcvoid
15435 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15436 tree v4sf_ftype_v4si
15437 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15438 tree v4si_ftype_v4sf
15439 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15440 tree v2df_ftype_v4si
15441 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15442 tree v4si_ftype_v2df
15443 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15444 tree v2si_ftype_v2df
15445 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15446 tree v4sf_ftype_v2df
15447 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15448 tree v2df_ftype_v2si
15449 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15450 tree v2df_ftype_v4sf
15451 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15452 tree int_ftype_v2df
15453 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15454 tree int64_ftype_v2df
15455 = build_function_type_list (long_long_integer_type_node,
15456 V2DF_type_node, NULL_TREE);
15457 tree v2df_ftype_v2df_int
15458 = build_function_type_list (V2DF_type_node,
15459 V2DF_type_node, integer_type_node, NULL_TREE);
15460 tree v2df_ftype_v2df_int64
15461 = build_function_type_list (V2DF_type_node,
15462 V2DF_type_node, long_long_integer_type_node,
15463 NULL_TREE);
15464 tree v4sf_ftype_v4sf_v2df
15465 = build_function_type_list (V4SF_type_node,
15466 V4SF_type_node, V2DF_type_node, NULL_TREE);
15467 tree v2df_ftype_v2df_v4sf
15468 = build_function_type_list (V2DF_type_node,
15469 V2DF_type_node, V4SF_type_node, NULL_TREE);
15470 tree v2df_ftype_v2df_v2df_int
15471 = build_function_type_list (V2DF_type_node,
15472 V2DF_type_node, V2DF_type_node,
15473 integer_type_node,
15474 NULL_TREE);
15475 tree v2df_ftype_v2df_pcdouble
15476 = build_function_type_list (V2DF_type_node,
15477 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15478 tree void_ftype_pdouble_v2df
15479 = build_function_type_list (void_type_node,
15480 pdouble_type_node, V2DF_type_node, NULL_TREE);
15481 tree void_ftype_pint_int
15482 = build_function_type_list (void_type_node,
15483 pint_type_node, integer_type_node, NULL_TREE);
15484 tree void_ftype_v16qi_v16qi_pchar
15485 = build_function_type_list (void_type_node,
15486 V16QI_type_node, V16QI_type_node,
15487 pchar_type_node, NULL_TREE);
15488 tree v2df_ftype_pcdouble
15489 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15490 tree v2df_ftype_v2df_v2df
15491 = build_function_type_list (V2DF_type_node,
15492 V2DF_type_node, V2DF_type_node, NULL_TREE);
15493 tree v16qi_ftype_v16qi_v16qi
15494 = build_function_type_list (V16QI_type_node,
15495 V16QI_type_node, V16QI_type_node, NULL_TREE);
15496 tree v8hi_ftype_v8hi_v8hi
15497 = build_function_type_list (V8HI_type_node,
15498 V8HI_type_node, V8HI_type_node, NULL_TREE);
15499 tree v4si_ftype_v4si_v4si
15500 = build_function_type_list (V4SI_type_node,
15501 V4SI_type_node, V4SI_type_node, NULL_TREE);
15502 tree v2di_ftype_v2di_v2di
15503 = build_function_type_list (V2DI_type_node,
15504 V2DI_type_node, V2DI_type_node, NULL_TREE);
15505 tree v2di_ftype_v2df_v2df
15506 = build_function_type_list (V2DI_type_node,
15507 V2DF_type_node, V2DF_type_node, NULL_TREE);
15508 tree v2df_ftype_v2df
15509 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15510 tree v2di_ftype_v2di_int
15511 = build_function_type_list (V2DI_type_node,
15512 V2DI_type_node, integer_type_node, NULL_TREE);
15513 tree v2di_ftype_v2di_v2di_int
15514 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15515 V2DI_type_node, integer_type_node, NULL_TREE);
15516 tree v4si_ftype_v4si_int
15517 = build_function_type_list (V4SI_type_node,
15518 V4SI_type_node, integer_type_node, NULL_TREE);
15519 tree v8hi_ftype_v8hi_int
15520 = build_function_type_list (V8HI_type_node,
15521 V8HI_type_node, integer_type_node, NULL_TREE);
15522 tree v8hi_ftype_v8hi_v2di
15523 = build_function_type_list (V8HI_type_node,
15524 V8HI_type_node, V2DI_type_node, NULL_TREE);
15525 tree v4si_ftype_v4si_v2di
15526 = build_function_type_list (V4SI_type_node,
15527 V4SI_type_node, V2DI_type_node, NULL_TREE);
15528 tree v4si_ftype_v8hi_v8hi
15529 = build_function_type_list (V4SI_type_node,
15530 V8HI_type_node, V8HI_type_node, NULL_TREE);
15531 tree di_ftype_v8qi_v8qi
15532 = build_function_type_list (long_long_unsigned_type_node,
15533 V8QI_type_node, V8QI_type_node, NULL_TREE);
15534 tree di_ftype_v2si_v2si
15535 = build_function_type_list (long_long_unsigned_type_node,
15536 V2SI_type_node, V2SI_type_node, NULL_TREE);
15537 tree v2di_ftype_v16qi_v16qi
15538 = build_function_type_list (V2DI_type_node,
15539 V16QI_type_node, V16QI_type_node, NULL_TREE);
15540 tree v2di_ftype_v4si_v4si
15541 = build_function_type_list (V2DI_type_node,
15542 V4SI_type_node, V4SI_type_node, NULL_TREE);
15543 tree int_ftype_v16qi
15544 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15545 tree v16qi_ftype_pcchar
15546 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15547 tree void_ftype_pchar_v16qi
15548 = build_function_type_list (void_type_node,
15549 pchar_type_node, V16QI_type_node, NULL_TREE);
15551 tree float80_type;
15552 tree float128_type;
15553 tree ftype;
15555 /* The __float80 type. */
15556 if (TYPE_MODE (long_double_type_node) == XFmode)
15557 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15558 "__float80");
15559 else
15561 /* The __float80 type. */
15562 float80_type = make_node (REAL_TYPE);
15563 TYPE_PRECISION (float80_type) = 80;
15564 layout_type (float80_type);
15565 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15568 if (TARGET_64BIT)
15570 float128_type = make_node (REAL_TYPE);
15571 TYPE_PRECISION (float128_type) = 128;
15572 layout_type (float128_type);
15573 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15576 /* Add all builtins that are more or less simple operations on two
15577 operands. */
15578 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15580 /* Use one of the operands; the target can have a different mode for
15581 mask-generating compares. */
15582 enum machine_mode mode;
15583 tree type;
15585 if (d->name == 0)
15586 continue;
15587 mode = insn_data[d->icode].operand[1].mode;
15589 switch (mode)
15591 case V16QImode:
15592 type = v16qi_ftype_v16qi_v16qi;
15593 break;
15594 case V8HImode:
15595 type = v8hi_ftype_v8hi_v8hi;
15596 break;
15597 case V4SImode:
15598 type = v4si_ftype_v4si_v4si;
15599 break;
15600 case V2DImode:
15601 type = v2di_ftype_v2di_v2di;
15602 break;
15603 case V2DFmode:
15604 type = v2df_ftype_v2df_v2df;
15605 break;
15606 case V4SFmode:
15607 type = v4sf_ftype_v4sf_v4sf;
15608 break;
15609 case V8QImode:
15610 type = v8qi_ftype_v8qi_v8qi;
15611 break;
15612 case V4HImode:
15613 type = v4hi_ftype_v4hi_v4hi;
15614 break;
15615 case V2SImode:
15616 type = v2si_ftype_v2si_v2si;
15617 break;
15618 case DImode:
15619 type = di_ftype_di_di;
15620 break;
15622 default:
15623 gcc_unreachable ();
15626 /* Override for comparisons. */
15627 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15628 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15629 type = v4si_ftype_v4sf_v4sf;
15631 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15632 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15633 type = v2di_ftype_v2df_v2df;
15635 def_builtin (d->mask, d->name, type, d->code);
15638 /* Add all builtins that are more or less simple operations on 1 operand. */
15639 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15641 enum machine_mode mode;
15642 tree type;
15644 if (d->name == 0)
15645 continue;
15646 mode = insn_data[d->icode].operand[1].mode;
15648 switch (mode)
15650 case V16QImode:
15651 type = v16qi_ftype_v16qi;
15652 break;
15653 case V8HImode:
15654 type = v8hi_ftype_v8hi;
15655 break;
15656 case V4SImode:
15657 type = v4si_ftype_v4si;
15658 break;
15659 case V2DFmode:
15660 type = v2df_ftype_v2df;
15661 break;
15662 case V4SFmode:
15663 type = v4sf_ftype_v4sf;
15664 break;
15665 case V8QImode:
15666 type = v8qi_ftype_v8qi;
15667 break;
15668 case V4HImode:
15669 type = v4hi_ftype_v4hi;
15670 break;
15671 case V2SImode:
15672 type = v2si_ftype_v2si;
15673 break;
15675 default:
15676 abort ();
15679 def_builtin (d->mask, d->name, type, d->code);
15682 /* Add the remaining MMX insns with somewhat more complicated types. */
15683 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15684 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15685 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15686 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15688 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15689 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15690 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15692 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15693 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15695 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15696 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15698 /* comi/ucomi insns. */
15699 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15700 if (d->mask == MASK_SSE2)
15701 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15702 else
15703 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15705 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15706 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15707 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15709 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15710 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15711 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15712 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15713 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15714 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15715 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15716 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15717 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15718 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15719 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15721 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15723 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15724 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15726 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15727 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15728 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15729 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15731 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15732 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15733 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15734 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15736 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15738 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15740 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15741 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15742 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15743 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15744 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15745 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15747 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15749 /* Original 3DNow! */
15750 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15751 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15752 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15753 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15754 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15755 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15756 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15757 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15758 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15759 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15760 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15761 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15762 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15763 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15764 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15765 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15766 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15767 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15768 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15769 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15771 /* 3DNow! extension as used in the Athlon CPU. */
15772 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15773 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15774 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15775 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15776 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15777 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15779 /* SSE2 */
15780 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15782 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15783 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15785 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15786 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15788 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15789 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15790 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15791 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15792 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15794 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15795 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15796 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15797 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15799 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15800 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15802 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15804 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15805 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15807 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15808 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15809 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15810 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15811 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15813 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15815 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15816 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15817 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15818 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15820 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15821 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15822 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15824 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15825 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15826 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15827 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15829 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15830 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15831 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15833 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15834 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15836 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15837 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15839 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15840 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15841 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15843 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15844 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15845 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15847 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15848 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15850 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15851 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15852 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15853 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15855 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15856 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15857 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15858 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15860 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15861 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15863 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15865 /* Prescott New Instructions. */
15866 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15867 void_ftype_pcvoid_unsigned_unsigned,
15868 IX86_BUILTIN_MONITOR);
15869 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15870 void_ftype_unsigned_unsigned,
15871 IX86_BUILTIN_MWAIT);
15872 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15873 v4sf_ftype_v4sf,
15874 IX86_BUILTIN_MOVSHDUP);
15875 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15876 v4sf_ftype_v4sf,
15877 IX86_BUILTIN_MOVSLDUP);
15878 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15879 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15881 /* SSSE3. */
15882 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15883 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15884 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15885 IX86_BUILTIN_PALIGNR);
15887 /* Access to the vec_init patterns. */
15888 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15889 integer_type_node, NULL_TREE);
15890 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15891 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15893 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15894 short_integer_type_node,
15895 short_integer_type_node,
15896 short_integer_type_node, NULL_TREE);
15897 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15898 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15900 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15901 char_type_node, char_type_node,
15902 char_type_node, char_type_node,
15903 char_type_node, char_type_node,
15904 char_type_node, NULL_TREE);
15905 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15906 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15908 /* Access to the vec_extract patterns. */
15909 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15910 integer_type_node, NULL_TREE);
15911 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15912 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15914 ftype = build_function_type_list (long_long_integer_type_node,
15915 V2DI_type_node, integer_type_node,
15916 NULL_TREE);
15917 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15918 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15920 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15921 integer_type_node, NULL_TREE);
15922 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15923 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15925 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15926 integer_type_node, NULL_TREE);
15927 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15928 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15930 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15931 integer_type_node, NULL_TREE);
15932 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15933 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15935 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15936 integer_type_node, NULL_TREE);
15937 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15938 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15940 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15941 integer_type_node, NULL_TREE);
15942 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15943 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15945 /* Access to the vec_set patterns. */
15946 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15947 intHI_type_node,
15948 integer_type_node, NULL_TREE);
15949 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15950 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15952 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15953 intHI_type_node,
15954 integer_type_node, NULL_TREE);
15955 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15956 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15959 /* Errors in the source file can cause expand_expr to return const0_rtx
15960 where we expect a vector. To avoid crashing, use one of the vector
15961 clear instructions. */
15962 static rtx
15963 safe_vector_operand (rtx x, enum machine_mode mode)
15965 if (x == const0_rtx)
15966 x = CONST0_RTX (mode);
15967 return x;
15970 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15972 static rtx
15973 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15975 rtx pat, xops[3];
15976 tree arg0 = TREE_VALUE (arglist);
15977 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15978 rtx op0 = expand_normal (arg0);
15979 rtx op1 = expand_normal (arg1);
15980 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15981 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15982 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15984 if (VECTOR_MODE_P (mode0))
15985 op0 = safe_vector_operand (op0, mode0);
15986 if (VECTOR_MODE_P (mode1))
15987 op1 = safe_vector_operand (op1, mode1);
15989 if (optimize || !target
15990 || GET_MODE (target) != tmode
15991 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15992 target = gen_reg_rtx (tmode);
15994 if (GET_MODE (op1) == SImode && mode1 == TImode)
15996 rtx x = gen_reg_rtx (V4SImode);
15997 emit_insn (gen_sse2_loadd (x, op1));
15998 op1 = gen_lowpart (TImode, x);
16001 /* The insn must want input operands in the same modes as the
16002 result. */
16003 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16004 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16006 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16007 op0 = copy_to_mode_reg (mode0, op0);
16008 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16009 op1 = copy_to_mode_reg (mode1, op1);
16011 /* ??? Using ix86_fixup_binary_operands is problematic when
16012 we've got mismatched modes. Fake it. */
16014 xops[0] = target;
16015 xops[1] = op0;
16016 xops[2] = op1;
16018 if (tmode == mode0 && tmode == mode1)
16020 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16021 op0 = xops[1];
16022 op1 = xops[2];
16024 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16026 op0 = force_reg (mode0, op0);
16027 op1 = force_reg (mode1, op1);
16028 target = gen_reg_rtx (tmode);
16031 pat = GEN_FCN (icode) (target, op0, op1);
16032 if (! pat)
16033 return 0;
16034 emit_insn (pat);
16035 return target;
16038 /* Subroutine of ix86_expand_builtin to take care of stores. */
16040 static rtx
16041 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16043 rtx pat;
16044 tree arg0 = TREE_VALUE (arglist);
16045 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16046 rtx op0 = expand_normal (arg0);
16047 rtx op1 = expand_normal (arg1);
16048 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16049 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16051 if (VECTOR_MODE_P (mode1))
16052 op1 = safe_vector_operand (op1, mode1);
16054 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16055 op1 = copy_to_mode_reg (mode1, op1);
16057 pat = GEN_FCN (icode) (op0, op1);
16058 if (pat)
16059 emit_insn (pat);
16060 return 0;
16063 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16065 static rtx
16066 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16067 rtx target, int do_load)
16069 rtx pat;
16070 tree arg0 = TREE_VALUE (arglist);
16071 rtx op0 = expand_normal (arg0);
16072 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16073 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16075 if (optimize || !target
16076 || GET_MODE (target) != tmode
16077 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16078 target = gen_reg_rtx (tmode);
16079 if (do_load)
16080 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16081 else
16083 if (VECTOR_MODE_P (mode0))
16084 op0 = safe_vector_operand (op0, mode0);
16086 if ((optimize && !register_operand (op0, mode0))
16087 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16088 op0 = copy_to_mode_reg (mode0, op0);
16091 pat = GEN_FCN (icode) (target, op0);
16092 if (! pat)
16093 return 0;
16094 emit_insn (pat);
16095 return target;
16098 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16099 sqrtss, rsqrtss, rcpss. */
16101 static rtx
16102 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16104 rtx pat;
16105 tree arg0 = TREE_VALUE (arglist);
16106 rtx op1, op0 = expand_normal (arg0);
16107 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16108 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16110 if (optimize || !target
16111 || GET_MODE (target) != tmode
16112 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16113 target = gen_reg_rtx (tmode);
16115 if (VECTOR_MODE_P (mode0))
16116 op0 = safe_vector_operand (op0, mode0);
16118 if ((optimize && !register_operand (op0, mode0))
16119 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16120 op0 = copy_to_mode_reg (mode0, op0);
16122 op1 = op0;
16123 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16124 op1 = copy_to_mode_reg (mode0, op1);
16126 pat = GEN_FCN (icode) (target, op0, op1);
16127 if (! pat)
16128 return 0;
16129 emit_insn (pat);
16130 return target;
16133 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16135 static rtx
16136 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16137 rtx target)
16139 rtx pat;
16140 tree arg0 = TREE_VALUE (arglist);
16141 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16142 rtx op0 = expand_normal (arg0);
16143 rtx op1 = expand_normal (arg1);
16144 rtx op2;
16145 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16146 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16147 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16148 enum rtx_code comparison = d->comparison;
16150 if (VECTOR_MODE_P (mode0))
16151 op0 = safe_vector_operand (op0, mode0);
16152 if (VECTOR_MODE_P (mode1))
16153 op1 = safe_vector_operand (op1, mode1);
16155 /* Swap operands if we have a comparison that isn't available in
16156 hardware. */
16157 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16159 rtx tmp = gen_reg_rtx (mode1);
16160 emit_move_insn (tmp, op1);
16161 op1 = op0;
16162 op0 = tmp;
16165 if (optimize || !target
16166 || GET_MODE (target) != tmode
16167 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16168 target = gen_reg_rtx (tmode);
16170 if ((optimize && !register_operand (op0, mode0))
16171 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16172 op0 = copy_to_mode_reg (mode0, op0);
16173 if ((optimize && !register_operand (op1, mode1))
16174 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16175 op1 = copy_to_mode_reg (mode1, op1);
16177 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16178 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16179 if (! pat)
16180 return 0;
16181 emit_insn (pat);
16182 return target;
16185 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16187 static rtx
16188 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16189 rtx target)
16191 rtx pat;
16192 tree arg0 = TREE_VALUE (arglist);
16193 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16194 rtx op0 = expand_normal (arg0);
16195 rtx op1 = expand_normal (arg1);
16196 rtx op2;
16197 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16198 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16199 enum rtx_code comparison = d->comparison;
16201 if (VECTOR_MODE_P (mode0))
16202 op0 = safe_vector_operand (op0, mode0);
16203 if (VECTOR_MODE_P (mode1))
16204 op1 = safe_vector_operand (op1, mode1);
16206 /* Swap operands if we have a comparison that isn't available in
16207 hardware. */
16208 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16210 rtx tmp = op1;
16211 op1 = op0;
16212 op0 = tmp;
16215 target = gen_reg_rtx (SImode);
16216 emit_move_insn (target, const0_rtx);
16217 target = gen_rtx_SUBREG (QImode, target, 0);
16219 if ((optimize && !register_operand (op0, mode0))
16220 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16221 op0 = copy_to_mode_reg (mode0, op0);
16222 if ((optimize && !register_operand (op1, mode1))
16223 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16224 op1 = copy_to_mode_reg (mode1, op1);
16226 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16227 pat = GEN_FCN (d->icode) (op0, op1);
16228 if (! pat)
16229 return 0;
16230 emit_insn (pat);
16231 emit_insn (gen_rtx_SET (VOIDmode,
16232 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16233 gen_rtx_fmt_ee (comparison, QImode,
16234 SET_DEST (pat),
16235 const0_rtx)));
16237 return SUBREG_REG (target);
16240 /* Return the integer constant in ARG. Constrain it to be in the range
16241 of the subparts of VEC_TYPE; issue an error if not. */
16243 static int
16244 get_element_number (tree vec_type, tree arg)
16246 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16248 if (!host_integerp (arg, 1)
16249 || (elt = tree_low_cst (arg, 1), elt > max))
16251 error ("selector must be an integer constant in the range 0..%wi", max);
16252 return 0;
16255 return elt;
16258 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16259 ix86_expand_vector_init. We DO have language-level syntax for this, in
16260 the form of (type){ init-list }. Except that since we can't place emms
16261 instructions from inside the compiler, we can't allow the use of MMX
16262 registers unless the user explicitly asks for it. So we do *not* define
16263 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16264 we have builtins invoked by mmintrin.h that gives us license to emit
16265 these sorts of instructions. */
16267 static rtx
16268 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16270 enum machine_mode tmode = TYPE_MODE (type);
16271 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16272 int i, n_elt = GET_MODE_NUNITS (tmode);
16273 rtvec v = rtvec_alloc (n_elt);
16275 gcc_assert (VECTOR_MODE_P (tmode));
16277 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16279 rtx x = expand_normal (TREE_VALUE (arglist));
16280 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16283 gcc_assert (arglist == NULL);
16285 if (!target || !register_operand (target, tmode))
16286 target = gen_reg_rtx (tmode);
16288 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16289 return target;
16292 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16293 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16294 had a language-level syntax for referencing vector elements. */
16296 static rtx
16297 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16299 enum machine_mode tmode, mode0;
16300 tree arg0, arg1;
16301 int elt;
16302 rtx op0;
16304 arg0 = TREE_VALUE (arglist);
16305 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16307 op0 = expand_normal (arg0);
16308 elt = get_element_number (TREE_TYPE (arg0), arg1);
16310 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16311 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16312 gcc_assert (VECTOR_MODE_P (mode0));
16314 op0 = force_reg (mode0, op0);
16316 if (optimize || !target || !register_operand (target, tmode))
16317 target = gen_reg_rtx (tmode);
16319 ix86_expand_vector_extract (true, target, op0, elt);
16321 return target;
16324 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16325 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16326 a language-level syntax for referencing vector elements. */
16328 static rtx
16329 ix86_expand_vec_set_builtin (tree arglist)
16331 enum machine_mode tmode, mode1;
16332 tree arg0, arg1, arg2;
16333 int elt;
16334 rtx op0, op1;
16336 arg0 = TREE_VALUE (arglist);
16337 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16338 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16340 tmode = TYPE_MODE (TREE_TYPE (arg0));
16341 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16342 gcc_assert (VECTOR_MODE_P (tmode));
16344 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16345 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16346 elt = get_element_number (TREE_TYPE (arg0), arg2);
16348 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16349 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16351 op0 = force_reg (tmode, op0);
16352 op1 = force_reg (mode1, op1);
16354 ix86_expand_vector_set (true, op0, op1, elt);
16356 return op0;
16359 /* Expand an expression EXP that calls a built-in function,
16360 with result going to TARGET if that's convenient
16361 (and in mode MODE if that's convenient).
16362 SUBTARGET may be used as the target for computing one of EXP's operands.
16363 IGNORE is nonzero if the value is to be ignored. */
16365 static rtx
16366 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16367 enum machine_mode mode ATTRIBUTE_UNUSED,
16368 int ignore ATTRIBUTE_UNUSED)
16370 const struct builtin_description *d;
16371 size_t i;
16372 enum insn_code icode;
16373 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16374 tree arglist = TREE_OPERAND (exp, 1);
16375 tree arg0, arg1, arg2;
16376 rtx op0, op1, op2, pat;
16377 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16378 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16380 switch (fcode)
16382 case IX86_BUILTIN_EMMS:
16383 emit_insn (gen_mmx_emms ());
16384 return 0;
16386 case IX86_BUILTIN_SFENCE:
16387 emit_insn (gen_sse_sfence ());
16388 return 0;
16390 case IX86_BUILTIN_MASKMOVQ:
16391 case IX86_BUILTIN_MASKMOVDQU:
16392 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16393 ? CODE_FOR_mmx_maskmovq
16394 : CODE_FOR_sse2_maskmovdqu);
16395 /* Note the arg order is different from the operand order. */
16396 arg1 = TREE_VALUE (arglist);
16397 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16398 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16399 op0 = expand_normal (arg0);
16400 op1 = expand_normal (arg1);
16401 op2 = expand_normal (arg2);
16402 mode0 = insn_data[icode].operand[0].mode;
16403 mode1 = insn_data[icode].operand[1].mode;
16404 mode2 = insn_data[icode].operand[2].mode;
16406 op0 = force_reg (Pmode, op0);
16407 op0 = gen_rtx_MEM (mode1, op0);
16409 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16410 op0 = copy_to_mode_reg (mode0, op0);
16411 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16412 op1 = copy_to_mode_reg (mode1, op1);
16413 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16414 op2 = copy_to_mode_reg (mode2, op2);
16415 pat = GEN_FCN (icode) (op0, op1, op2);
16416 if (! pat)
16417 return 0;
16418 emit_insn (pat);
16419 return 0;
16421 case IX86_BUILTIN_SQRTSS:
16422 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16423 case IX86_BUILTIN_RSQRTSS:
16424 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16425 case IX86_BUILTIN_RCPSS:
16426 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16428 case IX86_BUILTIN_LOADUPS:
16429 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16431 case IX86_BUILTIN_STOREUPS:
16432 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16434 case IX86_BUILTIN_LOADHPS:
16435 case IX86_BUILTIN_LOADLPS:
16436 case IX86_BUILTIN_LOADHPD:
16437 case IX86_BUILTIN_LOADLPD:
16438 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16439 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16440 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16441 : CODE_FOR_sse2_loadlpd);
16442 arg0 = TREE_VALUE (arglist);
16443 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16444 op0 = expand_normal (arg0);
16445 op1 = expand_normal (arg1);
16446 tmode = insn_data[icode].operand[0].mode;
16447 mode0 = insn_data[icode].operand[1].mode;
16448 mode1 = insn_data[icode].operand[2].mode;
16450 op0 = force_reg (mode0, op0);
16451 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16452 if (optimize || target == 0
16453 || GET_MODE (target) != tmode
16454 || !register_operand (target, tmode))
16455 target = gen_reg_rtx (tmode);
16456 pat = GEN_FCN (icode) (target, op0, op1);
16457 if (! pat)
16458 return 0;
16459 emit_insn (pat);
16460 return target;
16462 case IX86_BUILTIN_STOREHPS:
16463 case IX86_BUILTIN_STORELPS:
16464 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16465 : CODE_FOR_sse_storelps);
16466 arg0 = TREE_VALUE (arglist);
16467 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16468 op0 = expand_normal (arg0);
16469 op1 = expand_normal (arg1);
16470 mode0 = insn_data[icode].operand[0].mode;
16471 mode1 = insn_data[icode].operand[1].mode;
16473 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16474 op1 = force_reg (mode1, op1);
16476 pat = GEN_FCN (icode) (op0, op1);
16477 if (! pat)
16478 return 0;
16479 emit_insn (pat);
16480 return const0_rtx;
16482 case IX86_BUILTIN_MOVNTPS:
16483 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16484 case IX86_BUILTIN_MOVNTQ:
16485 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16487 case IX86_BUILTIN_LDMXCSR:
16488 op0 = expand_normal (TREE_VALUE (arglist));
16489 target = assign_386_stack_local (SImode, SLOT_TEMP);
16490 emit_move_insn (target, op0);
16491 emit_insn (gen_sse_ldmxcsr (target));
16492 return 0;
16494 case IX86_BUILTIN_STMXCSR:
16495 target = assign_386_stack_local (SImode, SLOT_TEMP);
16496 emit_insn (gen_sse_stmxcsr (target));
16497 return copy_to_mode_reg (SImode, target);
16499 case IX86_BUILTIN_SHUFPS:
16500 case IX86_BUILTIN_SHUFPD:
16501 icode = (fcode == IX86_BUILTIN_SHUFPS
16502 ? CODE_FOR_sse_shufps
16503 : CODE_FOR_sse2_shufpd);
16504 arg0 = TREE_VALUE (arglist);
16505 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16506 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16507 op0 = expand_normal (arg0);
16508 op1 = expand_normal (arg1);
16509 op2 = expand_normal (arg2);
16510 tmode = insn_data[icode].operand[0].mode;
16511 mode0 = insn_data[icode].operand[1].mode;
16512 mode1 = insn_data[icode].operand[2].mode;
16513 mode2 = insn_data[icode].operand[3].mode;
16515 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16516 op0 = copy_to_mode_reg (mode0, op0);
16517 if ((optimize && !register_operand (op1, mode1))
16518 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16519 op1 = copy_to_mode_reg (mode1, op1);
16520 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16522 /* @@@ better error message */
16523 error ("mask must be an immediate");
16524 return gen_reg_rtx (tmode);
16526 if (optimize || target == 0
16527 || GET_MODE (target) != tmode
16528 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16529 target = gen_reg_rtx (tmode);
16530 pat = GEN_FCN (icode) (target, op0, op1, op2);
16531 if (! pat)
16532 return 0;
16533 emit_insn (pat);
16534 return target;
16536 case IX86_BUILTIN_PSHUFW:
16537 case IX86_BUILTIN_PSHUFD:
16538 case IX86_BUILTIN_PSHUFHW:
16539 case IX86_BUILTIN_PSHUFLW:
16540 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16541 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16542 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16543 : CODE_FOR_mmx_pshufw);
16544 arg0 = TREE_VALUE (arglist);
16545 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16546 op0 = expand_normal (arg0);
16547 op1 = expand_normal (arg1);
16548 tmode = insn_data[icode].operand[0].mode;
16549 mode1 = insn_data[icode].operand[1].mode;
16550 mode2 = insn_data[icode].operand[2].mode;
16552 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16553 op0 = copy_to_mode_reg (mode1, op0);
16554 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16556 /* @@@ better error message */
16557 error ("mask must be an immediate");
16558 return const0_rtx;
16560 if (target == 0
16561 || GET_MODE (target) != tmode
16562 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16563 target = gen_reg_rtx (tmode);
16564 pat = GEN_FCN (icode) (target, op0, op1);
16565 if (! pat)
16566 return 0;
16567 emit_insn (pat);
16568 return target;
16570 case IX86_BUILTIN_PSLLDQI128:
16571 case IX86_BUILTIN_PSRLDQI128:
16572 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16573 : CODE_FOR_sse2_lshrti3);
16574 arg0 = TREE_VALUE (arglist);
16575 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16576 op0 = expand_normal (arg0);
16577 op1 = expand_normal (arg1);
16578 tmode = insn_data[icode].operand[0].mode;
16579 mode1 = insn_data[icode].operand[1].mode;
16580 mode2 = insn_data[icode].operand[2].mode;
16582 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16584 op0 = copy_to_reg (op0);
16585 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16587 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16589 error ("shift must be an immediate");
16590 return const0_rtx;
16592 target = gen_reg_rtx (V2DImode);
16593 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16594 if (! pat)
16595 return 0;
16596 emit_insn (pat);
16597 return target;
16599 case IX86_BUILTIN_FEMMS:
16600 emit_insn (gen_mmx_femms ());
16601 return NULL_RTX;
16603 case IX86_BUILTIN_PAVGUSB:
16604 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16606 case IX86_BUILTIN_PF2ID:
16607 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16609 case IX86_BUILTIN_PFACC:
16610 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16612 case IX86_BUILTIN_PFADD:
16613 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16615 case IX86_BUILTIN_PFCMPEQ:
16616 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16618 case IX86_BUILTIN_PFCMPGE:
16619 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16621 case IX86_BUILTIN_PFCMPGT:
16622 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16624 case IX86_BUILTIN_PFMAX:
16625 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16627 case IX86_BUILTIN_PFMIN:
16628 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16630 case IX86_BUILTIN_PFMUL:
16631 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16633 case IX86_BUILTIN_PFRCP:
16634 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16636 case IX86_BUILTIN_PFRCPIT1:
16637 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16639 case IX86_BUILTIN_PFRCPIT2:
16640 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16642 case IX86_BUILTIN_PFRSQIT1:
16643 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16645 case IX86_BUILTIN_PFRSQRT:
16646 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16648 case IX86_BUILTIN_PFSUB:
16649 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16651 case IX86_BUILTIN_PFSUBR:
16652 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16654 case IX86_BUILTIN_PI2FD:
16655 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16657 case IX86_BUILTIN_PMULHRW:
16658 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16660 case IX86_BUILTIN_PF2IW:
16661 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16663 case IX86_BUILTIN_PFNACC:
16664 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16666 case IX86_BUILTIN_PFPNACC:
16667 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16669 case IX86_BUILTIN_PI2FW:
16670 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16672 case IX86_BUILTIN_PSWAPDSI:
16673 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16675 case IX86_BUILTIN_PSWAPDSF:
16676 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16678 case IX86_BUILTIN_SQRTSD:
16679 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16680 case IX86_BUILTIN_LOADUPD:
16681 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16682 case IX86_BUILTIN_STOREUPD:
16683 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16685 case IX86_BUILTIN_MFENCE:
16686 emit_insn (gen_sse2_mfence ());
16687 return 0;
16688 case IX86_BUILTIN_LFENCE:
16689 emit_insn (gen_sse2_lfence ());
16690 return 0;
16692 case IX86_BUILTIN_CLFLUSH:
16693 arg0 = TREE_VALUE (arglist);
16694 op0 = expand_normal (arg0);
16695 icode = CODE_FOR_sse2_clflush;
16696 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16697 op0 = copy_to_mode_reg (Pmode, op0);
16699 emit_insn (gen_sse2_clflush (op0));
16700 return 0;
16702 case IX86_BUILTIN_MOVNTPD:
16703 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16704 case IX86_BUILTIN_MOVNTDQ:
16705 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16706 case IX86_BUILTIN_MOVNTI:
16707 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16709 case IX86_BUILTIN_LOADDQU:
16710 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16711 case IX86_BUILTIN_STOREDQU:
16712 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16714 case IX86_BUILTIN_MONITOR:
16715 arg0 = TREE_VALUE (arglist);
16716 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16717 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16718 op0 = expand_normal (arg0);
16719 op1 = expand_normal (arg1);
16720 op2 = expand_normal (arg2);
16721 if (!REG_P (op0))
16722 op0 = copy_to_mode_reg (Pmode, op0);
16723 if (!REG_P (op1))
16724 op1 = copy_to_mode_reg (SImode, op1);
16725 if (!REG_P (op2))
16726 op2 = copy_to_mode_reg (SImode, op2);
16727 if (!TARGET_64BIT)
16728 emit_insn (gen_sse3_monitor (op0, op1, op2));
16729 else
16730 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16731 return 0;
16733 case IX86_BUILTIN_MWAIT:
16734 arg0 = TREE_VALUE (arglist);
16735 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16736 op0 = expand_normal (arg0);
16737 op1 = expand_normal (arg1);
16738 if (!REG_P (op0))
16739 op0 = copy_to_mode_reg (SImode, op0);
16740 if (!REG_P (op1))
16741 op1 = copy_to_mode_reg (SImode, op1);
16742 emit_insn (gen_sse3_mwait (op0, op1));
16743 return 0;
16745 case IX86_BUILTIN_LDDQU:
16746 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16747 target, 1);
16749 case IX86_BUILTIN_PALIGNR:
16750 case IX86_BUILTIN_PALIGNR128:
16751 if (fcode == IX86_BUILTIN_PALIGNR)
16753 icode = CODE_FOR_ssse3_palignrdi;
16754 mode = DImode;
16756 else
16758 icode = CODE_FOR_ssse3_palignrti;
16759 mode = V2DImode;
16761 arg0 = TREE_VALUE (arglist);
16762 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16763 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16764 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16765 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16766 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16767 tmode = insn_data[icode].operand[0].mode;
16768 mode1 = insn_data[icode].operand[1].mode;
16769 mode2 = insn_data[icode].operand[2].mode;
16770 mode3 = insn_data[icode].operand[3].mode;
16772 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16774 op0 = copy_to_reg (op0);
16775 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16777 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16779 op1 = copy_to_reg (op1);
16780 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16782 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16784 error ("shift must be an immediate");
16785 return const0_rtx;
16787 target = gen_reg_rtx (mode);
16788 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16789 op0, op1, op2);
16790 if (! pat)
16791 return 0;
16792 emit_insn (pat);
16793 return target;
16795 case IX86_BUILTIN_VEC_INIT_V2SI:
16796 case IX86_BUILTIN_VEC_INIT_V4HI:
16797 case IX86_BUILTIN_VEC_INIT_V8QI:
16798 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16800 case IX86_BUILTIN_VEC_EXT_V2DF:
16801 case IX86_BUILTIN_VEC_EXT_V2DI:
16802 case IX86_BUILTIN_VEC_EXT_V4SF:
16803 case IX86_BUILTIN_VEC_EXT_V4SI:
16804 case IX86_BUILTIN_VEC_EXT_V8HI:
16805 case IX86_BUILTIN_VEC_EXT_V2SI:
16806 case IX86_BUILTIN_VEC_EXT_V4HI:
16807 return ix86_expand_vec_ext_builtin (arglist, target);
16809 case IX86_BUILTIN_VEC_SET_V8HI:
16810 case IX86_BUILTIN_VEC_SET_V4HI:
16811 return ix86_expand_vec_set_builtin (arglist);
16813 default:
16814 break;
16817 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16818 if (d->code == fcode)
16820 /* Compares are treated specially. */
16821 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16822 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16823 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16824 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16825 return ix86_expand_sse_compare (d, arglist, target);
16827 return ix86_expand_binop_builtin (d->icode, arglist, target);
16830 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16831 if (d->code == fcode)
16832 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16834 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16835 if (d->code == fcode)
16836 return ix86_expand_sse_comi (d, arglist, target);
16838 gcc_unreachable ();
16841 /* Store OPERAND to the memory after reload is completed. This means
16842 that we can't easily use assign_stack_local. */
16844 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16846 rtx result;
16848 gcc_assert (reload_completed);
16849 if (TARGET_RED_ZONE)
16851 result = gen_rtx_MEM (mode,
16852 gen_rtx_PLUS (Pmode,
16853 stack_pointer_rtx,
16854 GEN_INT (-RED_ZONE_SIZE)));
16855 emit_move_insn (result, operand);
16857 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16859 switch (mode)
16861 case HImode:
16862 case SImode:
16863 operand = gen_lowpart (DImode, operand);
16864 /* FALLTHRU */
16865 case DImode:
16866 emit_insn (
16867 gen_rtx_SET (VOIDmode,
16868 gen_rtx_MEM (DImode,
16869 gen_rtx_PRE_DEC (DImode,
16870 stack_pointer_rtx)),
16871 operand));
16872 break;
16873 default:
16874 gcc_unreachable ();
16876 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16878 else
16880 switch (mode)
16882 case DImode:
16884 rtx operands[2];
16885 split_di (&operand, 1, operands, operands + 1);
16886 emit_insn (
16887 gen_rtx_SET (VOIDmode,
16888 gen_rtx_MEM (SImode,
16889 gen_rtx_PRE_DEC (Pmode,
16890 stack_pointer_rtx)),
16891 operands[1]));
16892 emit_insn (
16893 gen_rtx_SET (VOIDmode,
16894 gen_rtx_MEM (SImode,
16895 gen_rtx_PRE_DEC (Pmode,
16896 stack_pointer_rtx)),
16897 operands[0]));
16899 break;
16900 case HImode:
16901 /* Store HImodes as SImodes. */
16902 operand = gen_lowpart (SImode, operand);
16903 /* FALLTHRU */
16904 case SImode:
16905 emit_insn (
16906 gen_rtx_SET (VOIDmode,
16907 gen_rtx_MEM (GET_MODE (operand),
16908 gen_rtx_PRE_DEC (SImode,
16909 stack_pointer_rtx)),
16910 operand));
16911 break;
16912 default:
16913 gcc_unreachable ();
16915 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16917 return result;
16920 /* Free operand from the memory. */
16921 void
16922 ix86_free_from_memory (enum machine_mode mode)
16924 if (!TARGET_RED_ZONE)
16926 int size;
16928 if (mode == DImode || TARGET_64BIT)
16929 size = 8;
16930 else
16931 size = 4;
16932 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16933 to pop or add instruction if registers are available. */
16934 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16935 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16936 GEN_INT (size))));
16940 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16941 QImode must go into class Q_REGS.
16942 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16943 movdf to do mem-to-mem moves through integer regs. */
16944 enum reg_class
16945 ix86_preferred_reload_class (rtx x, enum reg_class class)
16947 enum machine_mode mode = GET_MODE (x);
16949 /* We're only allowed to return a subclass of CLASS. Many of the
16950 following checks fail for NO_REGS, so eliminate that early. */
16951 if (class == NO_REGS)
16952 return NO_REGS;
16954 /* All classes can load zeros. */
16955 if (x == CONST0_RTX (mode))
16956 return class;
16958 /* Force constants into memory if we are loading a (nonzero) constant into
16959 an MMX or SSE register. This is because there are no MMX/SSE instructions
16960 to load from a constant. */
16961 if (CONSTANT_P (x)
16962 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16963 return NO_REGS;
16965 /* Prefer SSE regs only, if we can use them for math. */
16966 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16967 return SSE_CLASS_P (class) ? class : NO_REGS;
16969 /* Floating-point constants need more complex checks. */
16970 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16972 /* General regs can load everything. */
16973 if (reg_class_subset_p (class, GENERAL_REGS))
16974 return class;
16976 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16977 zero above. We only want to wind up preferring 80387 registers if
16978 we plan on doing computation with them. */
16979 if (TARGET_80387
16980 && standard_80387_constant_p (x))
16982 /* Limit class to non-sse. */
16983 if (class == FLOAT_SSE_REGS)
16984 return FLOAT_REGS;
16985 if (class == FP_TOP_SSE_REGS)
16986 return FP_TOP_REG;
16987 if (class == FP_SECOND_SSE_REGS)
16988 return FP_SECOND_REG;
16989 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16990 return class;
16993 return NO_REGS;
16996 /* Generally when we see PLUS here, it's the function invariant
16997 (plus soft-fp const_int). Which can only be computed into general
16998 regs. */
16999 if (GET_CODE (x) == PLUS)
17000 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17002 /* QImode constants are easy to load, but non-constant QImode data
17003 must go into Q_REGS. */
17004 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17006 if (reg_class_subset_p (class, Q_REGS))
17007 return class;
17008 if (reg_class_subset_p (Q_REGS, class))
17009 return Q_REGS;
17010 return NO_REGS;
17013 return class;
17016 /* Discourage putting floating-point values in SSE registers unless
17017 SSE math is being used, and likewise for the 387 registers. */
17018 enum reg_class
17019 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17021 enum machine_mode mode = GET_MODE (x);
17023 /* Restrict the output reload class to the register bank that we are doing
17024 math on. If we would like not to return a subset of CLASS, reject this
17025 alternative: if reload cannot do this, it will still use its choice. */
17026 mode = GET_MODE (x);
17027 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17028 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17030 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17032 if (class == FP_TOP_SSE_REGS)
17033 return FP_TOP_REG;
17034 else if (class == FP_SECOND_SSE_REGS)
17035 return FP_SECOND_REG;
17036 else
17037 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17040 return class;
17043 /* If we are copying between general and FP registers, we need a memory
17044 location. The same is true for SSE and MMX registers.
17046 The macro can't work reliably when one of the CLASSES is class containing
17047 registers from multiple units (SSE, MMX, integer). We avoid this by never
17048 combining those units in single alternative in the machine description.
17049 Ensure that this constraint holds to avoid unexpected surprises.
17051 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17052 enforce these sanity checks. */
17055 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17056 enum machine_mode mode, int strict)
17058 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17059 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17060 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17061 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17062 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17063 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17065 gcc_assert (!strict);
17066 return true;
17069 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17070 return true;
17072 /* ??? This is a lie. We do have moves between mmx/general, and for
17073 mmx/sse2. But by saying we need secondary memory we discourage the
17074 register allocator from using the mmx registers unless needed. */
17075 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17076 return true;
17078 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17080 /* SSE1 doesn't have any direct moves from other classes. */
17081 if (!TARGET_SSE2)
17082 return true;
17084 /* If the target says that inter-unit moves are more expensive
17085 than moving through memory, then don't generate them. */
17086 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17087 return true;
17089 /* Between SSE and general, we have moves no larger than word size. */
17090 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17091 return true;
17093 /* ??? For the cost of one register reformat penalty, we could use
17094 the same instructions to move SFmode and DFmode data, but the
17095 relevant move patterns don't support those alternatives. */
17096 if (mode == SFmode || mode == DFmode)
17097 return true;
17100 return false;
17103 /* Return true if the registers in CLASS cannot represent the change from
17104 modes FROM to TO. */
17106 bool
17107 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17108 enum reg_class class)
17110 if (from == to)
17111 return false;
17113 /* x87 registers can't do subreg at all, as all values are reformatted
17114 to extended precision. */
17115 if (MAYBE_FLOAT_CLASS_P (class))
17116 return true;
17118 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17120 /* Vector registers do not support QI or HImode loads. If we don't
17121 disallow a change to these modes, reload will assume it's ok to
17122 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17123 the vec_dupv4hi pattern. */
17124 if (GET_MODE_SIZE (from) < 4)
17125 return true;
17127 /* Vector registers do not support subreg with nonzero offsets, which
17128 are otherwise valid for integer registers. Since we can't see
17129 whether we have a nonzero offset from here, prohibit all
17130 nonparadoxical subregs changing size. */
17131 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17132 return true;
17135 return false;
17138 /* Return the cost of moving data from a register in class CLASS1 to
17139 one in class CLASS2.
17141 It is not required that the cost always equal 2 when FROM is the same as TO;
17142 on some machines it is expensive to move between registers if they are not
17143 general registers. */
17146 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17147 enum reg_class class2)
17149 /* In case we require secondary memory, compute cost of the store followed
17150 by load. In order to avoid bad register allocation choices, we need
17151 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17153 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17155 int cost = 1;
17157 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17158 MEMORY_MOVE_COST (mode, class1, 1));
17159 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17160 MEMORY_MOVE_COST (mode, class2, 1));
17162 /* In case of copying from general_purpose_register we may emit multiple
17163 stores followed by single load causing memory size mismatch stall.
17164 Count this as arbitrarily high cost of 20. */
17165 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17166 cost += 20;
17168 /* In the case of FP/MMX moves, the registers actually overlap, and we
17169 have to switch modes in order to treat them differently. */
17170 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17171 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17172 cost += 20;
17174 return cost;
17177 /* Moves between SSE/MMX and integer unit are expensive. */
17178 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17179 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17180 return ix86_cost->mmxsse_to_integer;
17181 if (MAYBE_FLOAT_CLASS_P (class1))
17182 return ix86_cost->fp_move;
17183 if (MAYBE_SSE_CLASS_P (class1))
17184 return ix86_cost->sse_move;
17185 if (MAYBE_MMX_CLASS_P (class1))
17186 return ix86_cost->mmx_move;
17187 return 2;
17190 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17192 bool
17193 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17195 /* Flags and only flags can only hold CCmode values. */
17196 if (CC_REGNO_P (regno))
17197 return GET_MODE_CLASS (mode) == MODE_CC;
17198 if (GET_MODE_CLASS (mode) == MODE_CC
17199 || GET_MODE_CLASS (mode) == MODE_RANDOM
17200 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17201 return 0;
17202 if (FP_REGNO_P (regno))
17203 return VALID_FP_MODE_P (mode);
17204 if (SSE_REGNO_P (regno))
17206 /* We implement the move patterns for all vector modes into and
17207 out of SSE registers, even when no operation instructions
17208 are available. */
17209 return (VALID_SSE_REG_MODE (mode)
17210 || VALID_SSE2_REG_MODE (mode)
17211 || VALID_MMX_REG_MODE (mode)
17212 || VALID_MMX_REG_MODE_3DNOW (mode));
17214 if (MMX_REGNO_P (regno))
17216 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17217 so if the register is available at all, then we can move data of
17218 the given mode into or out of it. */
17219 return (VALID_MMX_REG_MODE (mode)
17220 || VALID_MMX_REG_MODE_3DNOW (mode));
17223 if (mode == QImode)
17225 /* Take care for QImode values - they can be in non-QI regs,
17226 but then they do cause partial register stalls. */
17227 if (regno < 4 || TARGET_64BIT)
17228 return 1;
17229 if (!TARGET_PARTIAL_REG_STALL)
17230 return 1;
17231 return reload_in_progress || reload_completed;
17233 /* We handle both integer and floats in the general purpose registers. */
17234 else if (VALID_INT_MODE_P (mode))
17235 return 1;
17236 else if (VALID_FP_MODE_P (mode))
17237 return 1;
17238 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17239 on to use that value in smaller contexts, this can easily force a
17240 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17241 supporting DImode, allow it. */
17242 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17243 return 1;
17245 return 0;
17248 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17249 tieable integer mode. */
17251 static bool
17252 ix86_tieable_integer_mode_p (enum machine_mode mode)
17254 switch (mode)
17256 case HImode:
17257 case SImode:
17258 return true;
17260 case QImode:
17261 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17263 case DImode:
17264 return TARGET_64BIT;
17266 default:
17267 return false;
17271 /* Return true if MODE1 is accessible in a register that can hold MODE2
17272 without copying. That is, all register classes that can hold MODE2
17273 can also hold MODE1. */
17275 bool
17276 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17278 if (mode1 == mode2)
17279 return true;
17281 if (ix86_tieable_integer_mode_p (mode1)
17282 && ix86_tieable_integer_mode_p (mode2))
17283 return true;
17285 /* MODE2 being XFmode implies fp stack or general regs, which means we
17286 can tie any smaller floating point modes to it. Note that we do not
17287 tie this with TFmode. */
17288 if (mode2 == XFmode)
17289 return mode1 == SFmode || mode1 == DFmode;
17291 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17292 that we can tie it with SFmode. */
17293 if (mode2 == DFmode)
17294 return mode1 == SFmode;
17296 /* If MODE2 is only appropriate for an SSE register, then tie with
17297 any other mode acceptable to SSE registers. */
17298 if (GET_MODE_SIZE (mode2) >= 8
17299 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17300 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17302 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17303 with any other mode acceptable to MMX registers. */
17304 if (GET_MODE_SIZE (mode2) == 8
17305 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17306 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17308 return false;
17311 /* Return the cost of moving data of mode M between a
17312 register and memory. A value of 2 is the default; this cost is
17313 relative to those in `REGISTER_MOVE_COST'.
17315 If moving between registers and memory is more expensive than
17316 between two registers, you should define this macro to express the
17317 relative cost.
17319 Model also increased moving costs of QImode registers in non
17320 Q_REGS classes.
17323 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17325 if (FLOAT_CLASS_P (class))
17327 int index;
17328 switch (mode)
17330 case SFmode:
17331 index = 0;
17332 break;
17333 case DFmode:
17334 index = 1;
17335 break;
17336 case XFmode:
17337 index = 2;
17338 break;
17339 default:
17340 return 100;
17342 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17344 if (SSE_CLASS_P (class))
17346 int index;
17347 switch (GET_MODE_SIZE (mode))
17349 case 4:
17350 index = 0;
17351 break;
17352 case 8:
17353 index = 1;
17354 break;
17355 case 16:
17356 index = 2;
17357 break;
17358 default:
17359 return 100;
17361 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17363 if (MMX_CLASS_P (class))
17365 int index;
17366 switch (GET_MODE_SIZE (mode))
17368 case 4:
17369 index = 0;
17370 break;
17371 case 8:
17372 index = 1;
17373 break;
17374 default:
17375 return 100;
17377 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17379 switch (GET_MODE_SIZE (mode))
17381 case 1:
17382 if (in)
17383 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17384 : ix86_cost->movzbl_load);
17385 else
17386 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17387 : ix86_cost->int_store[0] + 4);
17388 break;
17389 case 2:
17390 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17391 default:
17392 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17393 if (mode == TFmode)
17394 mode = XFmode;
17395 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17396 * (((int) GET_MODE_SIZE (mode)
17397 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17401 /* Compute a (partial) cost for rtx X. Return true if the complete
17402 cost has been computed, and false if subexpressions should be
17403 scanned. In either case, *TOTAL contains the cost result. */
17405 static bool
17406 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17408 enum machine_mode mode = GET_MODE (x);
17410 switch (code)
17412 case CONST_INT:
17413 case CONST:
17414 case LABEL_REF:
17415 case SYMBOL_REF:
17416 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17417 *total = 3;
17418 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17419 *total = 2;
17420 else if (flag_pic && SYMBOLIC_CONST (x)
17421 && (!TARGET_64BIT
17422 || (!GET_CODE (x) != LABEL_REF
17423 && (GET_CODE (x) != SYMBOL_REF
17424 || !SYMBOL_REF_LOCAL_P (x)))))
17425 *total = 1;
17426 else
17427 *total = 0;
17428 return true;
17430 case CONST_DOUBLE:
17431 if (mode == VOIDmode)
17432 *total = 0;
17433 else
17434 switch (standard_80387_constant_p (x))
17436 case 1: /* 0.0 */
17437 *total = 1;
17438 break;
17439 default: /* Other constants */
17440 *total = 2;
17441 break;
17442 case 0:
17443 case -1:
17444 /* Start with (MEM (SYMBOL_REF)), since that's where
17445 it'll probably end up. Add a penalty for size. */
17446 *total = (COSTS_N_INSNS (1)
17447 + (flag_pic != 0 && !TARGET_64BIT)
17448 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17449 break;
17451 return true;
17453 case ZERO_EXTEND:
17454 /* The zero extensions is often completely free on x86_64, so make
17455 it as cheap as possible. */
17456 if (TARGET_64BIT && mode == DImode
17457 && GET_MODE (XEXP (x, 0)) == SImode)
17458 *total = 1;
17459 else if (TARGET_ZERO_EXTEND_WITH_AND)
17460 *total = ix86_cost->add;
17461 else
17462 *total = ix86_cost->movzx;
17463 return false;
17465 case SIGN_EXTEND:
17466 *total = ix86_cost->movsx;
17467 return false;
17469 case ASHIFT:
17470 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17471 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17473 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17474 if (value == 1)
17476 *total = ix86_cost->add;
17477 return false;
17479 if ((value == 2 || value == 3)
17480 && ix86_cost->lea <= ix86_cost->shift_const)
17482 *total = ix86_cost->lea;
17483 return false;
17486 /* FALLTHRU */
17488 case ROTATE:
17489 case ASHIFTRT:
17490 case LSHIFTRT:
17491 case ROTATERT:
17492 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17494 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17496 if (INTVAL (XEXP (x, 1)) > 32)
17497 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17498 else
17499 *total = ix86_cost->shift_const * 2;
17501 else
17503 if (GET_CODE (XEXP (x, 1)) == AND)
17504 *total = ix86_cost->shift_var * 2;
17505 else
17506 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17509 else
17511 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17512 *total = ix86_cost->shift_const;
17513 else
17514 *total = ix86_cost->shift_var;
17516 return false;
17518 case MULT:
17519 if (FLOAT_MODE_P (mode))
17521 *total = ix86_cost->fmul;
17522 return false;
17524 else
17526 rtx op0 = XEXP (x, 0);
17527 rtx op1 = XEXP (x, 1);
17528 int nbits;
17529 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17531 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17532 for (nbits = 0; value != 0; value &= value - 1)
17533 nbits++;
17535 else
17536 /* This is arbitrary. */
17537 nbits = 7;
17539 /* Compute costs correctly for widening multiplication. */
17540 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17541 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17542 == GET_MODE_SIZE (mode))
17544 int is_mulwiden = 0;
17545 enum machine_mode inner_mode = GET_MODE (op0);
17547 if (GET_CODE (op0) == GET_CODE (op1))
17548 is_mulwiden = 1, op1 = XEXP (op1, 0);
17549 else if (GET_CODE (op1) == CONST_INT)
17551 if (GET_CODE (op0) == SIGN_EXTEND)
17552 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17553 == INTVAL (op1);
17554 else
17555 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17558 if (is_mulwiden)
17559 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17562 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17563 + nbits * ix86_cost->mult_bit
17564 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17566 return true;
17569 case DIV:
17570 case UDIV:
17571 case MOD:
17572 case UMOD:
17573 if (FLOAT_MODE_P (mode))
17574 *total = ix86_cost->fdiv;
17575 else
17576 *total = ix86_cost->divide[MODE_INDEX (mode)];
17577 return false;
17579 case PLUS:
17580 if (FLOAT_MODE_P (mode))
17581 *total = ix86_cost->fadd;
17582 else if (GET_MODE_CLASS (mode) == MODE_INT
17583 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17585 if (GET_CODE (XEXP (x, 0)) == PLUS
17586 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17587 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17588 && CONSTANT_P (XEXP (x, 1)))
17590 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17591 if (val == 2 || val == 4 || val == 8)
17593 *total = ix86_cost->lea;
17594 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17595 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17596 outer_code);
17597 *total += rtx_cost (XEXP (x, 1), outer_code);
17598 return true;
17601 else if (GET_CODE (XEXP (x, 0)) == MULT
17602 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17604 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17605 if (val == 2 || val == 4 || val == 8)
17607 *total = ix86_cost->lea;
17608 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17609 *total += rtx_cost (XEXP (x, 1), outer_code);
17610 return true;
17613 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17615 *total = ix86_cost->lea;
17616 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17617 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17618 *total += rtx_cost (XEXP (x, 1), outer_code);
17619 return true;
17622 /* FALLTHRU */
17624 case MINUS:
17625 if (FLOAT_MODE_P (mode))
17627 *total = ix86_cost->fadd;
17628 return false;
17630 /* FALLTHRU */
17632 case AND:
17633 case IOR:
17634 case XOR:
17635 if (!TARGET_64BIT && mode == DImode)
17637 *total = (ix86_cost->add * 2
17638 + (rtx_cost (XEXP (x, 0), outer_code)
17639 << (GET_MODE (XEXP (x, 0)) != DImode))
17640 + (rtx_cost (XEXP (x, 1), outer_code)
17641 << (GET_MODE (XEXP (x, 1)) != DImode)));
17642 return true;
17644 /* FALLTHRU */
17646 case NEG:
17647 if (FLOAT_MODE_P (mode))
17649 *total = ix86_cost->fchs;
17650 return false;
17652 /* FALLTHRU */
17654 case NOT:
17655 if (!TARGET_64BIT && mode == DImode)
17656 *total = ix86_cost->add * 2;
17657 else
17658 *total = ix86_cost->add;
17659 return false;
17661 case COMPARE:
17662 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17663 && XEXP (XEXP (x, 0), 1) == const1_rtx
17664 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17665 && XEXP (x, 1) == const0_rtx)
17667 /* This kind of construct is implemented using test[bwl].
17668 Treat it as if we had an AND. */
17669 *total = (ix86_cost->add
17670 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17671 + rtx_cost (const1_rtx, outer_code));
17672 return true;
17674 return false;
17676 case FLOAT_EXTEND:
17677 if (!TARGET_SSE_MATH
17678 || mode == XFmode
17679 || (mode == DFmode && !TARGET_SSE2))
17680 /* For standard 80387 constants, raise the cost to prevent
17681 compress_float_constant() to generate load from memory. */
17682 switch (standard_80387_constant_p (XEXP (x, 0)))
17684 case -1:
17685 case 0:
17686 *total = 0;
17687 break;
17688 case 1: /* 0.0 */
17689 *total = 1;
17690 break;
17691 default:
17692 *total = (x86_ext_80387_constants & TUNEMASK
17693 || optimize_size
17694 ? 1 : 0);
17696 return false;
17698 case ABS:
17699 if (FLOAT_MODE_P (mode))
17700 *total = ix86_cost->fabs;
17701 return false;
17703 case SQRT:
17704 if (FLOAT_MODE_P (mode))
17705 *total = ix86_cost->fsqrt;
17706 return false;
17708 case UNSPEC:
17709 if (XINT (x, 1) == UNSPEC_TP)
17710 *total = 0;
17711 return false;
17713 default:
17714 return false;
17718 #if TARGET_MACHO
17720 static int current_machopic_label_num;
17722 /* Given a symbol name and its associated stub, write out the
17723 definition of the stub. */
17725 void
17726 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17728 unsigned int length;
17729 char *binder_name, *symbol_name, lazy_ptr_name[32];
17730 int label = ++current_machopic_label_num;
17732 /* For 64-bit we shouldn't get here. */
17733 gcc_assert (!TARGET_64BIT);
17735 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17736 symb = (*targetm.strip_name_encoding) (symb);
17738 length = strlen (stub);
17739 binder_name = alloca (length + 32);
17740 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17742 length = strlen (symb);
17743 symbol_name = alloca (length + 32);
17744 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17746 sprintf (lazy_ptr_name, "L%d$lz", label);
17748 if (MACHOPIC_PURE)
17749 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17750 else
17751 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17753 fprintf (file, "%s:\n", stub);
17754 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17756 if (MACHOPIC_PURE)
17758 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17759 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17760 fprintf (file, "\tjmp\t*%%edx\n");
17762 else
17763 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17765 fprintf (file, "%s:\n", binder_name);
17767 if (MACHOPIC_PURE)
17769 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17770 fprintf (file, "\tpushl\t%%eax\n");
17772 else
17773 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17775 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17777 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17778 fprintf (file, "%s:\n", lazy_ptr_name);
17779 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17780 fprintf (file, "\t.long %s\n", binder_name);
17783 void
17784 darwin_x86_file_end (void)
17786 darwin_file_end ();
17787 ix86_file_end ();
17789 #endif /* TARGET_MACHO */
17791 /* Order the registers for register allocator. */
17793 void
17794 x86_order_regs_for_local_alloc (void)
17796 int pos = 0;
17797 int i;
17799 /* First allocate the local general purpose registers. */
17800 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17801 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17802 reg_alloc_order [pos++] = i;
17804 /* Global general purpose registers. */
17805 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17806 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17807 reg_alloc_order [pos++] = i;
17809 /* x87 registers come first in case we are doing FP math
17810 using them. */
17811 if (!TARGET_SSE_MATH)
17812 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17813 reg_alloc_order [pos++] = i;
17815 /* SSE registers. */
17816 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17817 reg_alloc_order [pos++] = i;
17818 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17819 reg_alloc_order [pos++] = i;
17821 /* x87 registers. */
17822 if (TARGET_SSE_MATH)
17823 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17824 reg_alloc_order [pos++] = i;
17826 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17827 reg_alloc_order [pos++] = i;
17829 /* Initialize the rest of array as we do not allocate some registers
17830 at all. */
17831 while (pos < FIRST_PSEUDO_REGISTER)
17832 reg_alloc_order [pos++] = 0;
17835 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17836 struct attribute_spec.handler. */
17837 static tree
17838 ix86_handle_struct_attribute (tree *node, tree name,
17839 tree args ATTRIBUTE_UNUSED,
17840 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17842 tree *type = NULL;
17843 if (DECL_P (*node))
17845 if (TREE_CODE (*node) == TYPE_DECL)
17846 type = &TREE_TYPE (*node);
17848 else
17849 type = node;
17851 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17852 || TREE_CODE (*type) == UNION_TYPE)))
17854 warning (OPT_Wattributes, "%qs attribute ignored",
17855 IDENTIFIER_POINTER (name));
17856 *no_add_attrs = true;
17859 else if ((is_attribute_p ("ms_struct", name)
17860 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17861 || ((is_attribute_p ("gcc_struct", name)
17862 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17864 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17865 IDENTIFIER_POINTER (name));
17866 *no_add_attrs = true;
17869 return NULL_TREE;
17872 static bool
17873 ix86_ms_bitfield_layout_p (tree record_type)
17875 return (TARGET_MS_BITFIELD_LAYOUT &&
17876 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17877 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17880 /* Returns an expression indicating where the this parameter is
17881 located on entry to the FUNCTION. */
17883 static rtx
17884 x86_this_parameter (tree function)
17886 tree type = TREE_TYPE (function);
17888 if (TARGET_64BIT)
17890 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17891 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17894 if (ix86_function_regparm (type, function) > 0)
17896 tree parm;
17898 parm = TYPE_ARG_TYPES (type);
17899 /* Figure out whether or not the function has a variable number of
17900 arguments. */
17901 for (; parm; parm = TREE_CHAIN (parm))
17902 if (TREE_VALUE (parm) == void_type_node)
17903 break;
17904 /* If not, the this parameter is in the first argument. */
17905 if (parm)
17907 int regno = 0;
17908 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17909 regno = 2;
17910 return gen_rtx_REG (SImode, regno);
17914 if (aggregate_value_p (TREE_TYPE (type), type))
17915 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17916 else
17917 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17920 /* Determine whether x86_output_mi_thunk can succeed. */
17922 static bool
17923 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17924 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17925 HOST_WIDE_INT vcall_offset, tree function)
17927 /* 64-bit can handle anything. */
17928 if (TARGET_64BIT)
17929 return true;
17931 /* For 32-bit, everything's fine if we have one free register. */
17932 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17933 return true;
17935 /* Need a free register for vcall_offset. */
17936 if (vcall_offset)
17937 return false;
17939 /* Need a free register for GOT references. */
17940 if (flag_pic && !(*targetm.binds_local_p) (function))
17941 return false;
17943 /* Otherwise ok. */
17944 return true;
17947 /* Output the assembler code for a thunk function. THUNK_DECL is the
17948 declaration for the thunk function itself, FUNCTION is the decl for
17949 the target function. DELTA is an immediate constant offset to be
17950 added to THIS. If VCALL_OFFSET is nonzero, the word at
17951 *(*this + vcall_offset) should be added to THIS. */
17953 static void
17954 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17955 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17956 HOST_WIDE_INT vcall_offset, tree function)
17958 rtx xops[3];
17959 rtx this = x86_this_parameter (function);
17960 rtx this_reg, tmp;
17962 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17963 pull it in now and let DELTA benefit. */
17964 if (REG_P (this))
17965 this_reg = this;
17966 else if (vcall_offset)
17968 /* Put the this parameter into %eax. */
17969 xops[0] = this;
17970 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17971 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17973 else
17974 this_reg = NULL_RTX;
17976 /* Adjust the this parameter by a fixed constant. */
17977 if (delta)
17979 xops[0] = GEN_INT (delta);
17980 xops[1] = this_reg ? this_reg : this;
17981 if (TARGET_64BIT)
17983 if (!x86_64_general_operand (xops[0], DImode))
17985 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17986 xops[1] = tmp;
17987 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17988 xops[0] = tmp;
17989 xops[1] = this;
17991 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17993 else
17994 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17997 /* Adjust the this parameter by a value stored in the vtable. */
17998 if (vcall_offset)
18000 if (TARGET_64BIT)
18001 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18002 else
18004 int tmp_regno = 2 /* ECX */;
18005 if (lookup_attribute ("fastcall",
18006 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18007 tmp_regno = 0 /* EAX */;
18008 tmp = gen_rtx_REG (SImode, tmp_regno);
18011 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18012 xops[1] = tmp;
18013 if (TARGET_64BIT)
18014 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18015 else
18016 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18018 /* Adjust the this parameter. */
18019 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18020 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18022 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18023 xops[0] = GEN_INT (vcall_offset);
18024 xops[1] = tmp2;
18025 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18026 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18028 xops[1] = this_reg;
18029 if (TARGET_64BIT)
18030 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18031 else
18032 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18035 /* If necessary, drop THIS back to its stack slot. */
18036 if (this_reg && this_reg != this)
18038 xops[0] = this_reg;
18039 xops[1] = this;
18040 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18043 xops[0] = XEXP (DECL_RTL (function), 0);
18044 if (TARGET_64BIT)
18046 if (!flag_pic || (*targetm.binds_local_p) (function))
18047 output_asm_insn ("jmp\t%P0", xops);
18048 else
18050 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18051 tmp = gen_rtx_CONST (Pmode, tmp);
18052 tmp = gen_rtx_MEM (QImode, tmp);
18053 xops[0] = tmp;
18054 output_asm_insn ("jmp\t%A0", xops);
18057 else
18059 if (!flag_pic || (*targetm.binds_local_p) (function))
18060 output_asm_insn ("jmp\t%P0", xops);
18061 else
18062 #if TARGET_MACHO
18063 if (TARGET_MACHO)
18065 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18066 tmp = (gen_rtx_SYMBOL_REF
18067 (Pmode,
18068 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18069 tmp = gen_rtx_MEM (QImode, tmp);
18070 xops[0] = tmp;
18071 output_asm_insn ("jmp\t%0", xops);
18073 else
18074 #endif /* TARGET_MACHO */
18076 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18077 output_set_got (tmp, NULL_RTX);
18079 xops[1] = tmp;
18080 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18081 output_asm_insn ("jmp\t{*}%1", xops);
18086 static void
18087 x86_file_start (void)
18089 default_file_start ();
18090 #if TARGET_MACHO
18091 darwin_file_start ();
18092 #endif
18093 if (X86_FILE_START_VERSION_DIRECTIVE)
18094 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18095 if (X86_FILE_START_FLTUSED)
18096 fputs ("\t.global\t__fltused\n", asm_out_file);
18097 if (ix86_asm_dialect == ASM_INTEL)
18098 fputs ("\t.intel_syntax\n", asm_out_file);
18102 x86_field_alignment (tree field, int computed)
18104 enum machine_mode mode;
18105 tree type = TREE_TYPE (field);
18107 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18108 return computed;
18109 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18110 ? get_inner_array_type (type) : type);
18111 if (mode == DFmode || mode == DCmode
18112 || GET_MODE_CLASS (mode) == MODE_INT
18113 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18114 return MIN (32, computed);
18115 return computed;
18118 /* Output assembler code to FILE to increment profiler label # LABELNO
18119 for profiling a function entry. */
18120 void
18121 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18123 if (TARGET_64BIT)
18124 if (flag_pic)
18126 #ifndef NO_PROFILE_COUNTERS
18127 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18128 #endif
18129 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18131 else
18133 #ifndef NO_PROFILE_COUNTERS
18134 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18135 #endif
18136 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18138 else if (flag_pic)
18140 #ifndef NO_PROFILE_COUNTERS
18141 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18142 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18143 #endif
18144 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18146 else
18148 #ifndef NO_PROFILE_COUNTERS
18149 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18150 PROFILE_COUNT_REGISTER);
18151 #endif
18152 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18156 /* We don't have exact information about the insn sizes, but we may assume
18157 quite safely that we are informed about all 1 byte insns and memory
18158 address sizes. This is enough to eliminate unnecessary padding in
18159 99% of cases. */
18161 static int
18162 min_insn_size (rtx insn)
18164 int l = 0;
18166 if (!INSN_P (insn) || !active_insn_p (insn))
18167 return 0;
18169 /* Discard alignments we've emit and jump instructions. */
18170 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18171 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18172 return 0;
18173 if (GET_CODE (insn) == JUMP_INSN
18174 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18175 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18176 return 0;
18178 /* Important case - calls are always 5 bytes.
18179 It is common to have many calls in the row. */
18180 if (GET_CODE (insn) == CALL_INSN
18181 && symbolic_reference_mentioned_p (PATTERN (insn))
18182 && !SIBLING_CALL_P (insn))
18183 return 5;
18184 if (get_attr_length (insn) <= 1)
18185 return 1;
18187 /* For normal instructions we may rely on the sizes of addresses
18188 and the presence of symbol to require 4 bytes of encoding.
18189 This is not the case for jumps where references are PC relative. */
18190 if (GET_CODE (insn) != JUMP_INSN)
18192 l = get_attr_length_address (insn);
18193 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18194 l = 4;
18196 if (l)
18197 return 1+l;
18198 else
18199 return 2;
18202 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18203 window. */
18205 static void
18206 ix86_avoid_jump_misspredicts (void)
18208 rtx insn, start = get_insns ();
18209 int nbytes = 0, njumps = 0;
18210 int isjump = 0;
18212 /* Look for all minimal intervals of instructions containing 4 jumps.
18213 The intervals are bounded by START and INSN. NBYTES is the total
18214 size of instructions in the interval including INSN and not including
18215 START. When the NBYTES is smaller than 16 bytes, it is possible
18216 that the end of START and INSN ends up in the same 16byte page.
18218 The smallest offset in the page INSN can start is the case where START
18219 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18220 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18222 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18225 nbytes += min_insn_size (insn);
18226 if (dump_file)
18227 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18228 INSN_UID (insn), min_insn_size (insn));
18229 if ((GET_CODE (insn) == JUMP_INSN
18230 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18231 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18232 || GET_CODE (insn) == CALL_INSN)
18233 njumps++;
18234 else
18235 continue;
18237 while (njumps > 3)
18239 start = NEXT_INSN (start);
18240 if ((GET_CODE (start) == JUMP_INSN
18241 && GET_CODE (PATTERN (start)) != ADDR_VEC
18242 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18243 || GET_CODE (start) == CALL_INSN)
18244 njumps--, isjump = 1;
18245 else
18246 isjump = 0;
18247 nbytes -= min_insn_size (start);
18249 gcc_assert (njumps >= 0);
18250 if (dump_file)
18251 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18252 INSN_UID (start), INSN_UID (insn), nbytes);
18254 if (njumps == 3 && isjump && nbytes < 16)
18256 int padsize = 15 - nbytes + min_insn_size (insn);
18258 if (dump_file)
18259 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18260 INSN_UID (insn), padsize);
18261 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18266 /* AMD Athlon works faster
18267 when RET is not destination of conditional jump or directly preceded
18268 by other jump instruction. We avoid the penalty by inserting NOP just
18269 before the RET instructions in such cases. */
18270 static void
18271 ix86_pad_returns (void)
18273 edge e;
18274 edge_iterator ei;
18276 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18278 basic_block bb = e->src;
18279 rtx ret = BB_END (bb);
18280 rtx prev;
18281 bool replace = false;
18283 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18284 || !maybe_hot_bb_p (bb))
18285 continue;
18286 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18287 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18288 break;
18289 if (prev && GET_CODE (prev) == CODE_LABEL)
18291 edge e;
18292 edge_iterator ei;
18294 FOR_EACH_EDGE (e, ei, bb->preds)
18295 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18296 && !(e->flags & EDGE_FALLTHRU))
18297 replace = true;
18299 if (!replace)
18301 prev = prev_active_insn (ret);
18302 if (prev
18303 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18304 || GET_CODE (prev) == CALL_INSN))
18305 replace = true;
18306 /* Empty functions get branch mispredict even when the jump destination
18307 is not visible to us. */
18308 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18309 replace = true;
18311 if (replace)
18313 emit_insn_before (gen_return_internal_long (), ret);
18314 delete_insn (ret);
18319 /* Implement machine specific optimizations. We implement padding of returns
18320 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18321 static void
18322 ix86_reorg (void)
18324 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18325 ix86_pad_returns ();
18326 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18327 ix86_avoid_jump_misspredicts ();
18330 /* Return nonzero when QImode register that must be represented via REX prefix
18331 is used. */
18332 bool
18333 x86_extended_QIreg_mentioned_p (rtx insn)
18335 int i;
18336 extract_insn_cached (insn);
18337 for (i = 0; i < recog_data.n_operands; i++)
18338 if (REG_P (recog_data.operand[i])
18339 && REGNO (recog_data.operand[i]) >= 4)
18340 return true;
18341 return false;
18344 /* Return nonzero when P points to register encoded via REX prefix.
18345 Called via for_each_rtx. */
18346 static int
18347 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18349 unsigned int regno;
18350 if (!REG_P (*p))
18351 return 0;
18352 regno = REGNO (*p);
18353 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18356 /* Return true when INSN mentions register that must be encoded using REX
18357 prefix. */
18358 bool
18359 x86_extended_reg_mentioned_p (rtx insn)
18361 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18364 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18365 optabs would emit if we didn't have TFmode patterns. */
18367 void
18368 x86_emit_floatuns (rtx operands[2])
18370 rtx neglab, donelab, i0, i1, f0, in, out;
18371 enum machine_mode mode, inmode;
18373 inmode = GET_MODE (operands[1]);
18374 gcc_assert (inmode == SImode || inmode == DImode);
18376 out = operands[0];
18377 in = force_reg (inmode, operands[1]);
18378 mode = GET_MODE (out);
18379 neglab = gen_label_rtx ();
18380 donelab = gen_label_rtx ();
18381 i1 = gen_reg_rtx (Pmode);
18382 f0 = gen_reg_rtx (mode);
18384 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18386 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18387 emit_jump_insn (gen_jump (donelab));
18388 emit_barrier ();
18390 emit_label (neglab);
18392 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18393 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18394 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18395 expand_float (f0, i0, 0);
18396 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18398 emit_label (donelab);
18401 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18402 with all elements equal to VAR. Return true if successful. */
18404 static bool
18405 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18406 rtx target, rtx val)
18408 enum machine_mode smode, wsmode, wvmode;
18409 rtx x;
18411 switch (mode)
18413 case V2SImode:
18414 case V2SFmode:
18415 if (!mmx_ok)
18416 return false;
18417 /* FALLTHRU */
18419 case V2DFmode:
18420 case V2DImode:
18421 case V4SFmode:
18422 case V4SImode:
18423 val = force_reg (GET_MODE_INNER (mode), val);
18424 x = gen_rtx_VEC_DUPLICATE (mode, val);
18425 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18426 return true;
18428 case V4HImode:
18429 if (!mmx_ok)
18430 return false;
18431 if (TARGET_SSE || TARGET_3DNOW_A)
18433 val = gen_lowpart (SImode, val);
18434 x = gen_rtx_TRUNCATE (HImode, val);
18435 x = gen_rtx_VEC_DUPLICATE (mode, x);
18436 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18437 return true;
18439 else
18441 smode = HImode;
18442 wsmode = SImode;
18443 wvmode = V2SImode;
18444 goto widen;
18447 case V8QImode:
18448 if (!mmx_ok)
18449 return false;
18450 smode = QImode;
18451 wsmode = HImode;
18452 wvmode = V4HImode;
18453 goto widen;
18454 case V8HImode:
18455 if (TARGET_SSE2)
18457 rtx tmp1, tmp2;
18458 /* Extend HImode to SImode using a paradoxical SUBREG. */
18459 tmp1 = gen_reg_rtx (SImode);
18460 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18461 /* Insert the SImode value as low element of V4SImode vector. */
18462 tmp2 = gen_reg_rtx (V4SImode);
18463 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18464 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18465 CONST0_RTX (V4SImode),
18466 const1_rtx);
18467 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18468 /* Cast the V4SImode vector back to a V8HImode vector. */
18469 tmp1 = gen_reg_rtx (V8HImode);
18470 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18471 /* Duplicate the low short through the whole low SImode word. */
18472 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18473 /* Cast the V8HImode vector back to a V4SImode vector. */
18474 tmp2 = gen_reg_rtx (V4SImode);
18475 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18476 /* Replicate the low element of the V4SImode vector. */
18477 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18478 /* Cast the V2SImode back to V8HImode, and store in target. */
18479 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18480 return true;
18482 smode = HImode;
18483 wsmode = SImode;
18484 wvmode = V4SImode;
18485 goto widen;
18486 case V16QImode:
18487 if (TARGET_SSE2)
18489 rtx tmp1, tmp2;
18490 /* Extend QImode to SImode using a paradoxical SUBREG. */
18491 tmp1 = gen_reg_rtx (SImode);
18492 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18493 /* Insert the SImode value as low element of V4SImode vector. */
18494 tmp2 = gen_reg_rtx (V4SImode);
18495 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18496 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18497 CONST0_RTX (V4SImode),
18498 const1_rtx);
18499 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18500 /* Cast the V4SImode vector back to a V16QImode vector. */
18501 tmp1 = gen_reg_rtx (V16QImode);
18502 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18503 /* Duplicate the low byte through the whole low SImode word. */
18504 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18505 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18506 /* Cast the V16QImode vector back to a V4SImode vector. */
18507 tmp2 = gen_reg_rtx (V4SImode);
18508 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18509 /* Replicate the low element of the V4SImode vector. */
18510 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18511 /* Cast the V2SImode back to V16QImode, and store in target. */
18512 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18513 return true;
18515 smode = QImode;
18516 wsmode = HImode;
18517 wvmode = V8HImode;
18518 goto widen;
18519 widen:
18520 /* Replicate the value once into the next wider mode and recurse. */
18521 val = convert_modes (wsmode, smode, val, true);
18522 x = expand_simple_binop (wsmode, ASHIFT, val,
18523 GEN_INT (GET_MODE_BITSIZE (smode)),
18524 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18525 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18527 x = gen_reg_rtx (wvmode);
18528 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18529 gcc_unreachable ();
18530 emit_move_insn (target, gen_lowpart (mode, x));
18531 return true;
18533 default:
18534 return false;
18538 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18539 whose ONE_VAR element is VAR, and other elements are zero. Return true
18540 if successful. */
18542 static bool
18543 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18544 rtx target, rtx var, int one_var)
18546 enum machine_mode vsimode;
18547 rtx new_target;
18548 rtx x, tmp;
18550 switch (mode)
18552 case V2SFmode:
18553 case V2SImode:
18554 if (!mmx_ok)
18555 return false;
18556 /* FALLTHRU */
18558 case V2DFmode:
18559 case V2DImode:
18560 if (one_var != 0)
18561 return false;
18562 var = force_reg (GET_MODE_INNER (mode), var);
18563 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18564 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18565 return true;
18567 case V4SFmode:
18568 case V4SImode:
18569 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18570 new_target = gen_reg_rtx (mode);
18571 else
18572 new_target = target;
18573 var = force_reg (GET_MODE_INNER (mode), var);
18574 x = gen_rtx_VEC_DUPLICATE (mode, var);
18575 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18576 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18577 if (one_var != 0)
18579 /* We need to shuffle the value to the correct position, so
18580 create a new pseudo to store the intermediate result. */
18582 /* With SSE2, we can use the integer shuffle insns. */
18583 if (mode != V4SFmode && TARGET_SSE2)
18585 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18586 GEN_INT (1),
18587 GEN_INT (one_var == 1 ? 0 : 1),
18588 GEN_INT (one_var == 2 ? 0 : 1),
18589 GEN_INT (one_var == 3 ? 0 : 1)));
18590 if (target != new_target)
18591 emit_move_insn (target, new_target);
18592 return true;
18595 /* Otherwise convert the intermediate result to V4SFmode and
18596 use the SSE1 shuffle instructions. */
18597 if (mode != V4SFmode)
18599 tmp = gen_reg_rtx (V4SFmode);
18600 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18602 else
18603 tmp = new_target;
18605 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18606 GEN_INT (1),
18607 GEN_INT (one_var == 1 ? 0 : 1),
18608 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18609 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18611 if (mode != V4SFmode)
18612 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18613 else if (tmp != target)
18614 emit_move_insn (target, tmp);
18616 else if (target != new_target)
18617 emit_move_insn (target, new_target);
18618 return true;
18620 case V8HImode:
18621 case V16QImode:
18622 vsimode = V4SImode;
18623 goto widen;
18624 case V4HImode:
18625 case V8QImode:
18626 if (!mmx_ok)
18627 return false;
18628 vsimode = V2SImode;
18629 goto widen;
18630 widen:
18631 if (one_var != 0)
18632 return false;
18634 /* Zero extend the variable element to SImode and recurse. */
18635 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18637 x = gen_reg_rtx (vsimode);
18638 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18639 var, one_var))
18640 gcc_unreachable ();
18642 emit_move_insn (target, gen_lowpart (mode, x));
18643 return true;
18645 default:
18646 return false;
18650 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18651 consisting of the values in VALS. It is known that all elements
18652 except ONE_VAR are constants. Return true if successful. */
18654 static bool
18655 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18656 rtx target, rtx vals, int one_var)
18658 rtx var = XVECEXP (vals, 0, one_var);
18659 enum machine_mode wmode;
18660 rtx const_vec, x;
18662 const_vec = copy_rtx (vals);
18663 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18664 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18666 switch (mode)
18668 case V2DFmode:
18669 case V2DImode:
18670 case V2SFmode:
18671 case V2SImode:
18672 /* For the two element vectors, it's just as easy to use
18673 the general case. */
18674 return false;
18676 case V4SFmode:
18677 case V4SImode:
18678 case V8HImode:
18679 case V4HImode:
18680 break;
18682 case V16QImode:
18683 wmode = V8HImode;
18684 goto widen;
18685 case V8QImode:
18686 wmode = V4HImode;
18687 goto widen;
18688 widen:
18689 /* There's no way to set one QImode entry easily. Combine
18690 the variable value with its adjacent constant value, and
18691 promote to an HImode set. */
18692 x = XVECEXP (vals, 0, one_var ^ 1);
18693 if (one_var & 1)
18695 var = convert_modes (HImode, QImode, var, true);
18696 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18697 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18698 x = GEN_INT (INTVAL (x) & 0xff);
18700 else
18702 var = convert_modes (HImode, QImode, var, true);
18703 x = gen_int_mode (INTVAL (x) << 8, HImode);
18705 if (x != const0_rtx)
18706 var = expand_simple_binop (HImode, IOR, var, x, var,
18707 1, OPTAB_LIB_WIDEN);
18709 x = gen_reg_rtx (wmode);
18710 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18711 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18713 emit_move_insn (target, gen_lowpart (mode, x));
18714 return true;
18716 default:
18717 return false;
18720 emit_move_insn (target, const_vec);
18721 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18722 return true;
18725 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18726 all values variable, and none identical. */
18728 static void
18729 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18730 rtx target, rtx vals)
18732 enum machine_mode half_mode = GET_MODE_INNER (mode);
18733 rtx op0 = NULL, op1 = NULL;
18734 bool use_vec_concat = false;
18736 switch (mode)
18738 case V2SFmode:
18739 case V2SImode:
18740 if (!mmx_ok && !TARGET_SSE)
18741 break;
18742 /* FALLTHRU */
18744 case V2DFmode:
18745 case V2DImode:
18746 /* For the two element vectors, we always implement VEC_CONCAT. */
18747 op0 = XVECEXP (vals, 0, 0);
18748 op1 = XVECEXP (vals, 0, 1);
18749 use_vec_concat = true;
18750 break;
18752 case V4SFmode:
18753 half_mode = V2SFmode;
18754 goto half;
18755 case V4SImode:
18756 half_mode = V2SImode;
18757 goto half;
18758 half:
18760 rtvec v;
18762 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18763 Recurse to load the two halves. */
18765 op0 = gen_reg_rtx (half_mode);
18766 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18767 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18769 op1 = gen_reg_rtx (half_mode);
18770 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18771 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18773 use_vec_concat = true;
18775 break;
18777 case V8HImode:
18778 case V16QImode:
18779 case V4HImode:
18780 case V8QImode:
18781 break;
18783 default:
18784 gcc_unreachable ();
18787 if (use_vec_concat)
18789 if (!register_operand (op0, half_mode))
18790 op0 = force_reg (half_mode, op0);
18791 if (!register_operand (op1, half_mode))
18792 op1 = force_reg (half_mode, op1);
18794 emit_insn (gen_rtx_SET (VOIDmode, target,
18795 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18797 else
18799 int i, j, n_elts, n_words, n_elt_per_word;
18800 enum machine_mode inner_mode;
18801 rtx words[4], shift;
18803 inner_mode = GET_MODE_INNER (mode);
18804 n_elts = GET_MODE_NUNITS (mode);
18805 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18806 n_elt_per_word = n_elts / n_words;
18807 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18809 for (i = 0; i < n_words; ++i)
18811 rtx word = NULL_RTX;
18813 for (j = 0; j < n_elt_per_word; ++j)
18815 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18816 elt = convert_modes (word_mode, inner_mode, elt, true);
18818 if (j == 0)
18819 word = elt;
18820 else
18822 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18823 word, 1, OPTAB_LIB_WIDEN);
18824 word = expand_simple_binop (word_mode, IOR, word, elt,
18825 word, 1, OPTAB_LIB_WIDEN);
18829 words[i] = word;
18832 if (n_words == 1)
18833 emit_move_insn (target, gen_lowpart (mode, words[0]));
18834 else if (n_words == 2)
18836 rtx tmp = gen_reg_rtx (mode);
18837 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18838 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18839 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18840 emit_move_insn (target, tmp);
18842 else if (n_words == 4)
18844 rtx tmp = gen_reg_rtx (V4SImode);
18845 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18846 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18847 emit_move_insn (target, gen_lowpart (mode, tmp));
18849 else
18850 gcc_unreachable ();
18854 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18855 instructions unless MMX_OK is true. */
18857 void
18858 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18860 enum machine_mode mode = GET_MODE (target);
18861 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18862 int n_elts = GET_MODE_NUNITS (mode);
18863 int n_var = 0, one_var = -1;
18864 bool all_same = true, all_const_zero = true;
18865 int i;
18866 rtx x;
18868 for (i = 0; i < n_elts; ++i)
18870 x = XVECEXP (vals, 0, i);
18871 if (!CONSTANT_P (x))
18872 n_var++, one_var = i;
18873 else if (x != CONST0_RTX (inner_mode))
18874 all_const_zero = false;
18875 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18876 all_same = false;
18879 /* Constants are best loaded from the constant pool. */
18880 if (n_var == 0)
18882 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18883 return;
18886 /* If all values are identical, broadcast the value. */
18887 if (all_same
18888 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18889 XVECEXP (vals, 0, 0)))
18890 return;
18892 /* Values where only one field is non-constant are best loaded from
18893 the pool and overwritten via move later. */
18894 if (n_var == 1)
18896 if (all_const_zero
18897 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18898 XVECEXP (vals, 0, one_var),
18899 one_var))
18900 return;
18902 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18903 return;
18906 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18909 void
18910 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18912 enum machine_mode mode = GET_MODE (target);
18913 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18914 bool use_vec_merge = false;
18915 rtx tmp;
18917 switch (mode)
18919 case V2SFmode:
18920 case V2SImode:
18921 if (mmx_ok)
18923 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18924 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18925 if (elt == 0)
18926 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18927 else
18928 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18929 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18930 return;
18932 break;
18934 case V2DFmode:
18935 case V2DImode:
18937 rtx op0, op1;
18939 /* For the two element vectors, we implement a VEC_CONCAT with
18940 the extraction of the other element. */
18942 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18943 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18945 if (elt == 0)
18946 op0 = val, op1 = tmp;
18947 else
18948 op0 = tmp, op1 = val;
18950 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18951 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18953 return;
18955 case V4SFmode:
18956 switch (elt)
18958 case 0:
18959 use_vec_merge = true;
18960 break;
18962 case 1:
18963 /* tmp = target = A B C D */
18964 tmp = copy_to_reg (target);
18965 /* target = A A B B */
18966 emit_insn (gen_sse_unpcklps (target, target, target));
18967 /* target = X A B B */
18968 ix86_expand_vector_set (false, target, val, 0);
18969 /* target = A X C D */
18970 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18971 GEN_INT (1), GEN_INT (0),
18972 GEN_INT (2+4), GEN_INT (3+4)));
18973 return;
18975 case 2:
18976 /* tmp = target = A B C D */
18977 tmp = copy_to_reg (target);
18978 /* tmp = X B C D */
18979 ix86_expand_vector_set (false, tmp, val, 0);
18980 /* target = A B X D */
18981 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18982 GEN_INT (0), GEN_INT (1),
18983 GEN_INT (0+4), GEN_INT (3+4)));
18984 return;
18986 case 3:
18987 /* tmp = target = A B C D */
18988 tmp = copy_to_reg (target);
18989 /* tmp = X B C D */
18990 ix86_expand_vector_set (false, tmp, val, 0);
18991 /* target = A B X D */
18992 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18993 GEN_INT (0), GEN_INT (1),
18994 GEN_INT (2+4), GEN_INT (0+4)));
18995 return;
18997 default:
18998 gcc_unreachable ();
19000 break;
19002 case V4SImode:
19003 /* Element 0 handled by vec_merge below. */
19004 if (elt == 0)
19006 use_vec_merge = true;
19007 break;
19010 if (TARGET_SSE2)
19012 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19013 store into element 0, then shuffle them back. */
19015 rtx order[4];
19017 order[0] = GEN_INT (elt);
19018 order[1] = const1_rtx;
19019 order[2] = const2_rtx;
19020 order[3] = GEN_INT (3);
19021 order[elt] = const0_rtx;
19023 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19024 order[1], order[2], order[3]));
19026 ix86_expand_vector_set (false, target, val, 0);
19028 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19029 order[1], order[2], order[3]));
19031 else
19033 /* For SSE1, we have to reuse the V4SF code. */
19034 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19035 gen_lowpart (SFmode, val), elt);
19037 return;
19039 case V8HImode:
19040 use_vec_merge = TARGET_SSE2;
19041 break;
19042 case V4HImode:
19043 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19044 break;
19046 case V16QImode:
19047 case V8QImode:
19048 default:
19049 break;
19052 if (use_vec_merge)
19054 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19055 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19056 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19058 else
19060 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19062 emit_move_insn (mem, target);
19064 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19065 emit_move_insn (tmp, val);
19067 emit_move_insn (target, mem);
19071 void
19072 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19074 enum machine_mode mode = GET_MODE (vec);
19075 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19076 bool use_vec_extr = false;
19077 rtx tmp;
19079 switch (mode)
19081 case V2SImode:
19082 case V2SFmode:
19083 if (!mmx_ok)
19084 break;
19085 /* FALLTHRU */
19087 case V2DFmode:
19088 case V2DImode:
19089 use_vec_extr = true;
19090 break;
19092 case V4SFmode:
19093 switch (elt)
19095 case 0:
19096 tmp = vec;
19097 break;
19099 case 1:
19100 case 3:
19101 tmp = gen_reg_rtx (mode);
19102 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19103 GEN_INT (elt), GEN_INT (elt),
19104 GEN_INT (elt+4), GEN_INT (elt+4)));
19105 break;
19107 case 2:
19108 tmp = gen_reg_rtx (mode);
19109 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19110 break;
19112 default:
19113 gcc_unreachable ();
19115 vec = tmp;
19116 use_vec_extr = true;
19117 elt = 0;
19118 break;
19120 case V4SImode:
19121 if (TARGET_SSE2)
19123 switch (elt)
19125 case 0:
19126 tmp = vec;
19127 break;
19129 case 1:
19130 case 3:
19131 tmp = gen_reg_rtx (mode);
19132 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19133 GEN_INT (elt), GEN_INT (elt),
19134 GEN_INT (elt), GEN_INT (elt)));
19135 break;
19137 case 2:
19138 tmp = gen_reg_rtx (mode);
19139 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19140 break;
19142 default:
19143 gcc_unreachable ();
19145 vec = tmp;
19146 use_vec_extr = true;
19147 elt = 0;
19149 else
19151 /* For SSE1, we have to reuse the V4SF code. */
19152 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19153 gen_lowpart (V4SFmode, vec), elt);
19154 return;
19156 break;
19158 case V8HImode:
19159 use_vec_extr = TARGET_SSE2;
19160 break;
19161 case V4HImode:
19162 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19163 break;
19165 case V16QImode:
19166 case V8QImode:
19167 /* ??? Could extract the appropriate HImode element and shift. */
19168 default:
19169 break;
19172 if (use_vec_extr)
19174 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19175 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19177 /* Let the rtl optimizers know about the zero extension performed. */
19178 if (inner_mode == HImode)
19180 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19181 target = gen_lowpart (SImode, target);
19184 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19186 else
19188 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19190 emit_move_insn (mem, vec);
19192 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19193 emit_move_insn (target, tmp);
19197 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19198 pattern to reduce; DEST is the destination; IN is the input vector. */
19200 void
19201 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19203 rtx tmp1, tmp2, tmp3;
19205 tmp1 = gen_reg_rtx (V4SFmode);
19206 tmp2 = gen_reg_rtx (V4SFmode);
19207 tmp3 = gen_reg_rtx (V4SFmode);
19209 emit_insn (gen_sse_movhlps (tmp1, in, in));
19210 emit_insn (fn (tmp2, tmp1, in));
19212 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19213 GEN_INT (1), GEN_INT (1),
19214 GEN_INT (1+4), GEN_INT (1+4)));
19215 emit_insn (fn (dest, tmp2, tmp3));
19218 /* Target hook for scalar_mode_supported_p. */
19219 static bool
19220 ix86_scalar_mode_supported_p (enum machine_mode mode)
19222 if (DECIMAL_FLOAT_MODE_P (mode))
19223 return true;
19224 else
19225 return default_scalar_mode_supported_p (mode);
19228 /* Implements target hook vector_mode_supported_p. */
19229 static bool
19230 ix86_vector_mode_supported_p (enum machine_mode mode)
19232 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19233 return true;
19234 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19235 return true;
19236 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19237 return true;
19238 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19239 return true;
19240 return false;
19243 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19245 We do this in the new i386 backend to maintain source compatibility
19246 with the old cc0-based compiler. */
19248 static tree
19249 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19250 tree inputs ATTRIBUTE_UNUSED,
19251 tree clobbers)
19253 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19254 clobbers);
19255 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19256 clobbers);
19257 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19258 clobbers);
19259 return clobbers;
19262 /* Return true if this goes in small data/bss. */
19264 static bool
19265 ix86_in_large_data_p (tree exp)
19267 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19268 return false;
19270 /* Functions are never large data. */
19271 if (TREE_CODE (exp) == FUNCTION_DECL)
19272 return false;
19274 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19276 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19277 if (strcmp (section, ".ldata") == 0
19278 || strcmp (section, ".lbss") == 0)
19279 return true;
19280 return false;
19282 else
19284 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19286 /* If this is an incomplete type with size 0, then we can't put it
19287 in data because it might be too big when completed. */
19288 if (!size || size > ix86_section_threshold)
19289 return true;
19292 return false;
19294 static void
19295 ix86_encode_section_info (tree decl, rtx rtl, int first)
19297 default_encode_section_info (decl, rtl, first);
19299 if (TREE_CODE (decl) == VAR_DECL
19300 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19301 && ix86_in_large_data_p (decl))
19302 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19305 /* Worker function for REVERSE_CONDITION. */
19307 enum rtx_code
19308 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19310 return (mode != CCFPmode && mode != CCFPUmode
19311 ? reverse_condition (code)
19312 : reverse_condition_maybe_unordered (code));
19315 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19316 to OPERANDS[0]. */
19318 const char *
19319 output_387_reg_move (rtx insn, rtx *operands)
19321 if (REG_P (operands[1])
19322 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19324 if (REGNO (operands[0]) == FIRST_STACK_REG)
19325 return output_387_ffreep (operands, 0);
19326 return "fstp\t%y0";
19328 if (STACK_TOP_P (operands[0]))
19329 return "fld%z1\t%y1";
19330 return "fst\t%y0";
19333 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19334 FP status register is set. */
19336 void
19337 ix86_emit_fp_unordered_jump (rtx label)
19339 rtx reg = gen_reg_rtx (HImode);
19340 rtx temp;
19342 emit_insn (gen_x86_fnstsw_1 (reg));
19344 if (TARGET_USE_SAHF)
19346 emit_insn (gen_x86_sahf_1 (reg));
19348 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19349 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19351 else
19353 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19355 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19356 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19359 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19360 gen_rtx_LABEL_REF (VOIDmode, label),
19361 pc_rtx);
19362 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19363 emit_jump_insn (temp);
19366 /* Output code to perform a log1p XFmode calculation. */
19368 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19370 rtx label1 = gen_label_rtx ();
19371 rtx label2 = gen_label_rtx ();
19373 rtx tmp = gen_reg_rtx (XFmode);
19374 rtx tmp2 = gen_reg_rtx (XFmode);
19376 emit_insn (gen_absxf2 (tmp, op1));
19377 emit_insn (gen_cmpxf (tmp,
19378 CONST_DOUBLE_FROM_REAL_VALUE (
19379 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19380 XFmode)));
19381 emit_jump_insn (gen_bge (label1));
19383 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19384 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19385 emit_jump (label2);
19387 emit_label (label1);
19388 emit_move_insn (tmp, CONST1_RTX (XFmode));
19389 emit_insn (gen_addxf3 (tmp, op1, tmp));
19390 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19391 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19393 emit_label (label2);
19396 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19398 static void
19399 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19400 tree decl)
19402 /* With Binutils 2.15, the "@unwind" marker must be specified on
19403 every occurrence of the ".eh_frame" section, not just the first
19404 one. */
19405 if (TARGET_64BIT
19406 && strcmp (name, ".eh_frame") == 0)
19408 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19409 flags & SECTION_WRITE ? "aw" : "a");
19410 return;
19412 default_elf_asm_named_section (name, flags, decl);
19415 /* Return the mangling of TYPE if it is an extended fundamental type. */
19417 static const char *
19418 ix86_mangle_fundamental_type (tree type)
19420 switch (TYPE_MODE (type))
19422 case TFmode:
19423 /* __float128 is "g". */
19424 return "g";
19425 case XFmode:
19426 /* "long double" or __float80 is "e". */
19427 return "e";
19428 default:
19429 return NULL;
19433 /* For 32-bit code we can save PIC register setup by using
19434 __stack_chk_fail_local hidden function instead of calling
19435 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19436 register, so it is better to call __stack_chk_fail directly. */
19438 static tree
19439 ix86_stack_protect_fail (void)
19441 return TARGET_64BIT
19442 ? default_external_stack_protect_fail ()
19443 : default_hidden_stack_protect_fail ();
19446 /* Select a format to encode pointers in exception handling data. CODE
19447 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19448 true if the symbol may be affected by dynamic relocations.
19450 ??? All x86 object file formats are capable of representing this.
19451 After all, the relocation needed is the same as for the call insn.
19452 Whether or not a particular assembler allows us to enter such, I
19453 guess we'll have to see. */
19455 asm_preferred_eh_data_format (int code, int global)
19457 if (flag_pic)
19459 int type = DW_EH_PE_sdata8;
19460 if (!TARGET_64BIT
19461 || ix86_cmodel == CM_SMALL_PIC
19462 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19463 type = DW_EH_PE_sdata4;
19464 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19466 if (ix86_cmodel == CM_SMALL
19467 || (ix86_cmodel == CM_MEDIUM && code))
19468 return DW_EH_PE_udata4;
19469 return DW_EH_PE_absptr;
19472 /* Expand copysign from SIGN to the positive value ABS_VALUE
19473 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19474 the sign-bit. */
19475 static void
19476 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
19478 enum machine_mode mode = GET_MODE (sign);
19479 rtx sgn = gen_reg_rtx (mode);
19480 if (mask == NULL_RTX)
19482 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
19483 if (!VECTOR_MODE_P (mode))
19485 /* We need to generate a scalar mode mask in this case. */
19486 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19487 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19488 mask = gen_reg_rtx (mode);
19489 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19492 else
19493 mask = gen_rtx_NOT (mode, mask);
19494 emit_insn (gen_rtx_SET (VOIDmode, sgn,
19495 gen_rtx_AND (mode, mask, sign)));
19496 emit_insn (gen_rtx_SET (VOIDmode, result,
19497 gen_rtx_IOR (mode, abs_value, sgn)));
19500 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19501 mask for masking out the sign-bit is stored in *SMASK, if that is
19502 non-null. */
19503 static rtx
19504 ix86_expand_sse_fabs (rtx op0, rtx *smask)
19506 enum machine_mode mode = GET_MODE (op0);
19507 rtx xa, mask;
19509 xa = gen_reg_rtx (mode);
19510 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
19511 if (!VECTOR_MODE_P (mode))
19513 /* We need to generate a scalar mode mask in this case. */
19514 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19515 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19516 mask = gen_reg_rtx (mode);
19517 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19519 emit_insn (gen_rtx_SET (VOIDmode, xa,
19520 gen_rtx_AND (mode, op0, mask)));
19522 if (smask)
19523 *smask = mask;
19525 return xa;
19528 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19529 swapping the operands if SWAP_OPERANDS is true. The expanded
19530 code is a forward jump to a newly created label in case the
19531 comparison is true. The generated label rtx is returned. */
19532 static rtx
19533 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
19534 bool swap_operands)
19536 rtx label, tmp;
19538 if (swap_operands)
19540 tmp = op0;
19541 op0 = op1;
19542 op1 = tmp;
19545 label = gen_label_rtx ();
19546 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
19547 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19548 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
19549 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
19550 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19551 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
19552 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19553 JUMP_LABEL (tmp) = label;
19555 return label;
19558 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19559 using comparison code CODE. Operands are swapped for the comparison if
19560 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19561 static rtx
19562 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
19563 bool swap_operands)
19565 enum machine_mode mode = GET_MODE (op0);
19566 rtx mask = gen_reg_rtx (mode);
19568 if (swap_operands)
19570 rtx tmp = op0;
19571 op0 = op1;
19572 op1 = tmp;
19575 if (mode == DFmode)
19576 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
19577 gen_rtx_fmt_ee (code, mode, op0, op1)));
19578 else
19579 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
19580 gen_rtx_fmt_ee (code, mode, op0, op1)));
19582 return mask;
19585 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19586 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19587 static rtx
19588 ix86_gen_TWO52 (enum machine_mode mode)
19590 REAL_VALUE_TYPE TWO52r;
19591 rtx TWO52;
19593 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
19594 TWO52 = const_double_from_real_value (TWO52r, mode);
19595 TWO52 = force_reg (mode, TWO52);
19597 return TWO52;
19600 /* Expand SSE sequence for computing lround from OP1 storing
19601 into OP0. */
19602 void
19603 ix86_expand_lround (rtx op0, rtx op1)
19605 /* C code for the stuff we're doing below:
19606 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19607 return (long)tmp;
19609 enum machine_mode mode = GET_MODE (op1);
19610 const struct real_format *fmt;
19611 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19612 rtx adj;
19614 /* load nextafter (0.5, 0.0) */
19615 fmt = REAL_MODE_FORMAT (mode);
19616 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19617 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19619 /* adj = copysign (0.5, op1) */
19620 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
19621 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
19623 /* adj = op1 + adj */
19624 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
19626 /* op0 = (imode)adj */
19627 expand_fix (op0, adj, 0);
19630 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19631 into OPERAND0. */
19632 void
19633 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
19635 /* C code for the stuff we're doing below (for do_floor):
19636 xi = (long)op1;
19637 xi -= (double)xi > op1 ? 1 : 0;
19638 return xi;
19640 enum machine_mode fmode = GET_MODE (op1);
19641 enum machine_mode imode = GET_MODE (op0);
19642 rtx ireg, freg, label, tmp;
19644 /* reg = (long)op1 */
19645 ireg = gen_reg_rtx (imode);
19646 expand_fix (ireg, op1, 0);
19648 /* freg = (double)reg */
19649 freg = gen_reg_rtx (fmode);
19650 expand_float (freg, ireg, 0);
19652 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19653 label = ix86_expand_sse_compare_and_jump (UNLE,
19654 freg, op1, !do_floor);
19655 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
19656 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
19657 emit_move_insn (ireg, tmp);
19659 emit_label (label);
19660 LABEL_NUSES (label) = 1;
19662 emit_move_insn (op0, ireg);
19665 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19666 result in OPERAND0. */
19667 void
19668 ix86_expand_rint (rtx operand0, rtx operand1)
19670 /* C code for the stuff we're doing below:
19671 xa = fabs (operand1);
19672 if (!isless (xa, 2**52))
19673 return operand1;
19674 xa = xa + 2**52 - 2**52;
19675 return copysign (xa, operand1);
19677 enum machine_mode mode = GET_MODE (operand0);
19678 rtx res, xa, label, TWO52, mask;
19680 res = gen_reg_rtx (mode);
19681 emit_move_insn (res, operand1);
19683 /* xa = abs (operand1) */
19684 xa = ix86_expand_sse_fabs (res, &mask);
19686 /* if (!isless (xa, TWO52)) goto label; */
19687 TWO52 = ix86_gen_TWO52 (mode);
19688 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19690 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19691 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19693 ix86_sse_copysign_to_positive (res, xa, res, mask);
19695 emit_label (label);
19696 LABEL_NUSES (label) = 1;
19698 emit_move_insn (operand0, res);
19701 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19702 into OPERAND0. */
19703 void
19704 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
19706 /* C code for the stuff we expand below.
19707 double xa = fabs (x), x2;
19708 if (!isless (xa, TWO52))
19709 return x;
19710 xa = xa + TWO52 - TWO52;
19711 x2 = copysign (xa, x);
19712 Compensate. Floor:
19713 if (x2 > x)
19714 x2 -= 1;
19715 Compensate. Ceil:
19716 if (x2 < x)
19717 x2 -= -1;
19718 return x2;
19720 enum machine_mode mode = GET_MODE (operand0);
19721 rtx xa, TWO52, tmp, label, one, res, mask;
19723 TWO52 = ix86_gen_TWO52 (mode);
19725 /* Temporary for holding the result, initialized to the input
19726 operand to ease control flow. */
19727 res = gen_reg_rtx (mode);
19728 emit_move_insn (res, operand1);
19730 /* xa = abs (operand1) */
19731 xa = ix86_expand_sse_fabs (res, &mask);
19733 /* if (!isless (xa, TWO52)) goto label; */
19734 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19736 /* xa = xa + TWO52 - TWO52; */
19737 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19738 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19740 /* xa = copysign (xa, operand1) */
19741 ix86_sse_copysign_to_positive (xa, xa, res, mask);
19743 /* generate 1.0 or -1.0 */
19744 one = force_reg (mode,
19745 const_double_from_real_value (do_floor
19746 ? dconst1 : dconstm1, mode));
19748 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19749 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19750 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19751 gen_rtx_AND (mode, one, tmp)));
19752 /* We always need to subtract here to preserve signed zero. */
19753 tmp = expand_simple_binop (mode, MINUS,
19754 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19755 emit_move_insn (res, tmp);
19757 emit_label (label);
19758 LABEL_NUSES (label) = 1;
19760 emit_move_insn (operand0, res);
19763 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19764 into OPERAND0. */
19765 void
19766 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
19768 /* C code for the stuff we expand below.
19769 double xa = fabs (x), x2;
19770 if (!isless (xa, TWO52))
19771 return x;
19772 x2 = (double)(long)x;
19773 Compensate. Floor:
19774 if (x2 > x)
19775 x2 -= 1;
19776 Compensate. Ceil:
19777 if (x2 < x)
19778 x2 += 1;
19779 if (HONOR_SIGNED_ZEROS (mode))
19780 return copysign (x2, x);
19781 return x2;
19783 enum machine_mode mode = GET_MODE (operand0);
19784 rtx xa, xi, TWO52, tmp, label, one, res, mask;
19786 TWO52 = ix86_gen_TWO52 (mode);
19788 /* Temporary for holding the result, initialized to the input
19789 operand to ease control flow. */
19790 res = gen_reg_rtx (mode);
19791 emit_move_insn (res, operand1);
19793 /* xa = abs (operand1) */
19794 xa = ix86_expand_sse_fabs (res, &mask);
19796 /* if (!isless (xa, TWO52)) goto label; */
19797 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19799 /* xa = (double)(long)x */
19800 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19801 expand_fix (xi, res, 0);
19802 expand_float (xa, xi, 0);
19804 /* generate 1.0 */
19805 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19807 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19808 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19809 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19810 gen_rtx_AND (mode, one, tmp)));
19811 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
19812 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19813 emit_move_insn (res, tmp);
19815 if (HONOR_SIGNED_ZEROS (mode))
19816 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19818 emit_label (label);
19819 LABEL_NUSES (label) = 1;
19821 emit_move_insn (operand0, res);
19824 /* Expand SSE sequence for computing round from OPERAND1 storing
19825 into OPERAND0. Sequence that works without relying on DImode truncation
19826 via cvttsd2siq that is only available on 64bit targets. */
19827 void
19828 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
19830 /* C code for the stuff we expand below.
19831 double xa = fabs (x), xa2, x2;
19832 if (!isless (xa, TWO52))
19833 return x;
19834 Using the absolute value and copying back sign makes
19835 -0.0 -> -0.0 correct.
19836 xa2 = xa + TWO52 - TWO52;
19837 Compensate.
19838 dxa = xa2 - xa;
19839 if (dxa <= -0.5)
19840 xa2 += 1;
19841 else if (dxa > 0.5)
19842 xa2 -= 1;
19843 x2 = copysign (xa2, x);
19844 return x2;
19846 enum machine_mode mode = GET_MODE (operand0);
19847 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
19849 TWO52 = ix86_gen_TWO52 (mode);
19851 /* Temporary for holding the result, initialized to the input
19852 operand to ease control flow. */
19853 res = gen_reg_rtx (mode);
19854 emit_move_insn (res, operand1);
19856 /* xa = abs (operand1) */
19857 xa = ix86_expand_sse_fabs (res, &mask);
19859 /* if (!isless (xa, TWO52)) goto label; */
19860 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19862 /* xa2 = xa + TWO52 - TWO52; */
19863 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19864 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
19866 /* dxa = xa2 - xa; */
19867 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
19869 /* generate 0.5, 1.0 and -0.5 */
19870 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
19871 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
19872 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
19873 0, OPTAB_DIRECT);
19875 /* Compensate. */
19876 tmp = gen_reg_rtx (mode);
19877 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19878 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
19879 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19880 gen_rtx_AND (mode, one, tmp)));
19881 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19882 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19883 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
19884 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19885 gen_rtx_AND (mode, one, tmp)));
19886 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19888 /* res = copysign (xa2, operand1) */
19889 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
19891 emit_label (label);
19892 LABEL_NUSES (label) = 1;
19894 emit_move_insn (operand0, res);
19897 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19898 into OPERAND0. */
19899 void
19900 ix86_expand_trunc (rtx operand0, rtx operand1)
19902 /* C code for SSE variant we expand below.
19903 double xa = fabs (x), x2;
19904 if (!isless (xa, TWO52))
19905 return x;
19906 x2 = (double)(long)x;
19907 if (HONOR_SIGNED_ZEROS (mode))
19908 return copysign (x2, x);
19909 return x2;
19911 enum machine_mode mode = GET_MODE (operand0);
19912 rtx xa, xi, TWO52, label, res, mask;
19914 TWO52 = ix86_gen_TWO52 (mode);
19916 /* Temporary for holding the result, initialized to the input
19917 operand to ease control flow. */
19918 res = gen_reg_rtx (mode);
19919 emit_move_insn (res, operand1);
19921 /* xa = abs (operand1) */
19922 xa = ix86_expand_sse_fabs (res, &mask);
19924 /* if (!isless (xa, TWO52)) goto label; */
19925 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19927 /* x = (double)(long)x */
19928 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19929 expand_fix (xi, res, 0);
19930 expand_float (res, xi, 0);
19932 if (HONOR_SIGNED_ZEROS (mode))
19933 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19935 emit_label (label);
19936 LABEL_NUSES (label) = 1;
19938 emit_move_insn (operand0, res);
19941 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19942 into OPERAND0. */
19943 void
19944 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
19946 enum machine_mode mode = GET_MODE (operand0);
19947 rtx xa, mask, TWO52, label, one, res, smask, tmp;
19949 /* C code for SSE variant we expand below.
19950 double xa = fabs (x), x2;
19951 if (!isless (xa, TWO52))
19952 return x;
19953 xa2 = xa + TWO52 - TWO52;
19954 Compensate:
19955 if (xa2 > xa)
19956 xa2 -= 1.0;
19957 x2 = copysign (xa2, x);
19958 return x2;
19961 TWO52 = ix86_gen_TWO52 (mode);
19963 /* Temporary for holding the result, initialized to the input
19964 operand to ease control flow. */
19965 res = gen_reg_rtx (mode);
19966 emit_move_insn (res, operand1);
19968 /* xa = abs (operand1) */
19969 xa = ix86_expand_sse_fabs (res, &smask);
19971 /* if (!isless (xa, TWO52)) goto label; */
19972 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19974 /* res = xa + TWO52 - TWO52; */
19975 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19976 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
19977 emit_move_insn (res, tmp);
19979 /* generate 1.0 */
19980 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19982 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
19983 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
19984 emit_insn (gen_rtx_SET (VOIDmode, mask,
19985 gen_rtx_AND (mode, mask, one)));
19986 tmp = expand_simple_binop (mode, MINUS,
19987 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
19988 emit_move_insn (res, tmp);
19990 /* res = copysign (res, operand1) */
19991 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
19993 emit_label (label);
19994 LABEL_NUSES (label) = 1;
19996 emit_move_insn (operand0, res);
19999 /* Expand SSE sequence for computing round from OPERAND1 storing
20000 into OPERAND0. */
20001 void
20002 ix86_expand_round (rtx operand0, rtx operand1)
20004 /* C code for the stuff we're doing below:
20005 double xa = fabs (x);
20006 if (!isless (xa, TWO52))
20007 return x;
20008 xa = (double)(long)(xa + nextafter (0.5, 0.0));
20009 return copysign (xa, x);
20011 enum machine_mode mode = GET_MODE (operand0);
20012 rtx res, TWO52, xa, label, xi, half, mask;
20013 const struct real_format *fmt;
20014 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
20016 /* Temporary for holding the result, initialized to the input
20017 operand to ease control flow. */
20018 res = gen_reg_rtx (mode);
20019 emit_move_insn (res, operand1);
20021 TWO52 = ix86_gen_TWO52 (mode);
20022 xa = ix86_expand_sse_fabs (res, &mask);
20023 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20025 /* load nextafter (0.5, 0.0) */
20026 fmt = REAL_MODE_FORMAT (mode);
20027 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
20028 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
20030 /* xa = xa + 0.5 */
20031 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
20032 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
20034 /* xa = (double)(int64_t)xa */
20035 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20036 expand_fix (xi, xa, 0);
20037 expand_float (xa, xi, 0);
20039 /* res = copysign (xa, operand1) */
20040 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
20042 emit_label (label);
20043 LABEL_NUSES (label) = 1;
20045 emit_move_insn (operand0, res);
20048 #include "gt-i386.h"