2006-11-13 H.J. Lu <hongjiu.lu@intel.com>
[official-gcc.git] / gcc / config / i386 / i386.c
bloba9784dde65d1afa5535e888462db67a12bc95afd
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 static const
72 struct processor_costs size_cost = { /* costs for tuning for size */
73 COSTS_N_BYTES (2), /* cost of an add instruction */
74 COSTS_N_BYTES (3), /* cost of a lea instruction */
75 COSTS_N_BYTES (2), /* variable shift costs */
76 COSTS_N_BYTES (3), /* constant shift costs */
77 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
78 COSTS_N_BYTES (3), /* HI */
79 COSTS_N_BYTES (3), /* SI */
80 COSTS_N_BYTES (3), /* DI */
81 COSTS_N_BYTES (5)}, /* other */
82 0, /* cost of multiply per each bit set */
83 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
84 COSTS_N_BYTES (3), /* HI */
85 COSTS_N_BYTES (3), /* SI */
86 COSTS_N_BYTES (3), /* DI */
87 COSTS_N_BYTES (5)}, /* other */
88 COSTS_N_BYTES (3), /* cost of movsx */
89 COSTS_N_BYTES (3), /* cost of movzx */
90 0, /* "large" insn */
91 2, /* MOVE_RATIO */
92 2, /* cost for loading QImode using movzbl */
93 {2, 2, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 2, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {2, 2, 2}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {2, 2, 2}, /* cost of storing fp registers
101 in SFmode, DFmode and XFmode */
102 3, /* cost of moving MMX register */
103 {3, 3}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {3, 3}, /* cost of storing MMX registers
106 in SImode and DImode */
107 3, /* cost of moving SSE register */
108 {3, 3, 3}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {3, 3, 3}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
113 0, /* size of prefetch block */
114 0, /* number of parallel prefetches */
115 2, /* Branch cost */
116 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
117 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
118 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
119 COSTS_N_BYTES (2), /* cost of FABS instruction. */
120 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
121 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 /* Processor costs (relative to an add) */
125 static const
126 struct processor_costs i386_cost = { /* 386 specific costs */
127 COSTS_N_INSNS (1), /* cost of an add instruction */
128 COSTS_N_INSNS (1), /* cost of a lea instruction */
129 COSTS_N_INSNS (3), /* variable shift costs */
130 COSTS_N_INSNS (2), /* constant shift costs */
131 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
132 COSTS_N_INSNS (6), /* HI */
133 COSTS_N_INSNS (6), /* SI */
134 COSTS_N_INSNS (6), /* DI */
135 COSTS_N_INSNS (6)}, /* other */
136 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
137 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
138 COSTS_N_INSNS (23), /* HI */
139 COSTS_N_INSNS (23), /* SI */
140 COSTS_N_INSNS (23), /* DI */
141 COSTS_N_INSNS (23)}, /* other */
142 COSTS_N_INSNS (3), /* cost of movsx */
143 COSTS_N_INSNS (2), /* cost of movzx */
144 15, /* "large" insn */
145 3, /* MOVE_RATIO */
146 4, /* cost for loading QImode using movzbl */
147 {2, 4, 2}, /* cost of loading integer registers
148 in QImode, HImode and SImode.
149 Relative to reg-reg move (2). */
150 {2, 4, 2}, /* cost of storing integer registers */
151 2, /* cost of reg,reg fld/fst */
152 {8, 8, 8}, /* cost of loading fp registers
153 in SFmode, DFmode and XFmode */
154 {8, 8, 8}, /* cost of storing fp registers
155 in SFmode, DFmode and XFmode */
156 2, /* cost of moving MMX register */
157 {4, 8}, /* cost of loading MMX registers
158 in SImode and DImode */
159 {4, 8}, /* cost of storing MMX registers
160 in SImode and DImode */
161 2, /* cost of moving SSE register */
162 {4, 8, 16}, /* cost of loading SSE registers
163 in SImode, DImode and TImode */
164 {4, 8, 16}, /* cost of storing SSE registers
165 in SImode, DImode and TImode */
166 3, /* MMX or SSE register to integer */
167 0, /* size of prefetch block */
168 0, /* number of parallel prefetches */
169 1, /* Branch cost */
170 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
171 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
172 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
173 COSTS_N_INSNS (22), /* cost of FABS instruction. */
174 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
175 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 static const
179 struct processor_costs i486_cost = { /* 486 specific costs */
180 COSTS_N_INSNS (1), /* cost of an add instruction */
181 COSTS_N_INSNS (1), /* cost of a lea instruction */
182 COSTS_N_INSNS (3), /* variable shift costs */
183 COSTS_N_INSNS (2), /* constant shift costs */
184 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
185 COSTS_N_INSNS (12), /* HI */
186 COSTS_N_INSNS (12), /* SI */
187 COSTS_N_INSNS (12), /* DI */
188 COSTS_N_INSNS (12)}, /* other */
189 1, /* cost of multiply per each bit set */
190 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
191 COSTS_N_INSNS (40), /* HI */
192 COSTS_N_INSNS (40), /* SI */
193 COSTS_N_INSNS (40), /* DI */
194 COSTS_N_INSNS (40)}, /* other */
195 COSTS_N_INSNS (3), /* cost of movsx */
196 COSTS_N_INSNS (2), /* cost of movzx */
197 15, /* "large" insn */
198 3, /* MOVE_RATIO */
199 4, /* cost for loading QImode using movzbl */
200 {2, 4, 2}, /* cost of loading integer registers
201 in QImode, HImode and SImode.
202 Relative to reg-reg move (2). */
203 {2, 4, 2}, /* cost of storing integer registers */
204 2, /* cost of reg,reg fld/fst */
205 {8, 8, 8}, /* cost of loading fp registers
206 in SFmode, DFmode and XFmode */
207 {8, 8, 8}, /* cost of storing fp registers
208 in SFmode, DFmode and XFmode */
209 2, /* cost of moving MMX register */
210 {4, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {4, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
222 1, /* Branch cost */
223 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
224 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
225 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
226 COSTS_N_INSNS (3), /* cost of FABS instruction. */
227 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
228 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 static const
232 struct processor_costs pentium_cost = {
233 COSTS_N_INSNS (1), /* cost of an add instruction */
234 COSTS_N_INSNS (1), /* cost of a lea instruction */
235 COSTS_N_INSNS (4), /* variable shift costs */
236 COSTS_N_INSNS (1), /* constant shift costs */
237 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
238 COSTS_N_INSNS (11), /* HI */
239 COSTS_N_INSNS (11), /* SI */
240 COSTS_N_INSNS (11), /* DI */
241 COSTS_N_INSNS (11)}, /* other */
242 0, /* cost of multiply per each bit set */
243 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
244 COSTS_N_INSNS (25), /* HI */
245 COSTS_N_INSNS (25), /* SI */
246 COSTS_N_INSNS (25), /* DI */
247 COSTS_N_INSNS (25)}, /* other */
248 COSTS_N_INSNS (3), /* cost of movsx */
249 COSTS_N_INSNS (2), /* cost of movzx */
250 8, /* "large" insn */
251 6, /* MOVE_RATIO */
252 6, /* cost for loading QImode using movzbl */
253 {2, 4, 2}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 4, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of storing fp registers
261 in SFmode, DFmode and XFmode */
262 8, /* cost of moving MMX register */
263 {8, 8}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {8, 8}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {4, 8, 16}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {4, 8, 16}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
275 2, /* Branch cost */
276 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (1), /* cost of FABS instruction. */
280 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 static const
285 struct processor_costs pentiumpro_cost = {
286 COSTS_N_INSNS (1), /* cost of an add instruction */
287 COSTS_N_INSNS (1), /* cost of a lea instruction */
288 COSTS_N_INSNS (1), /* variable shift costs */
289 COSTS_N_INSNS (1), /* constant shift costs */
290 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
291 COSTS_N_INSNS (4), /* HI */
292 COSTS_N_INSNS (4), /* SI */
293 COSTS_N_INSNS (4), /* DI */
294 COSTS_N_INSNS (4)}, /* other */
295 0, /* cost of multiply per each bit set */
296 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
297 COSTS_N_INSNS (17), /* HI */
298 COSTS_N_INSNS (17), /* SI */
299 COSTS_N_INSNS (17), /* DI */
300 COSTS_N_INSNS (17)}, /* other */
301 COSTS_N_INSNS (1), /* cost of movsx */
302 COSTS_N_INSNS (1), /* cost of movzx */
303 8, /* "large" insn */
304 6, /* MOVE_RATIO */
305 2, /* cost for loading QImode using movzbl */
306 {4, 4, 4}, /* cost of loading integer registers
307 in QImode, HImode and SImode.
308 Relative to reg-reg move (2). */
309 {2, 2, 2}, /* cost of storing integer registers */
310 2, /* cost of reg,reg fld/fst */
311 {2, 2, 6}, /* cost of loading fp registers
312 in SFmode, DFmode and XFmode */
313 {4, 4, 6}, /* cost of storing fp registers
314 in SFmode, DFmode and XFmode */
315 2, /* cost of moving MMX register */
316 {2, 2}, /* cost of loading MMX registers
317 in SImode and DImode */
318 {2, 2}, /* cost of storing MMX registers
319 in SImode and DImode */
320 2, /* cost of moving SSE register */
321 {2, 2, 8}, /* cost of loading SSE registers
322 in SImode, DImode and TImode */
323 {2, 2, 8}, /* cost of storing SSE registers
324 in SImode, DImode and TImode */
325 3, /* MMX or SSE register to integer */
326 32, /* size of prefetch block */
327 6, /* number of parallel prefetches */
328 2, /* Branch cost */
329 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
330 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
331 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
332 COSTS_N_INSNS (2), /* cost of FABS instruction. */
333 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
334 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 static const
338 struct processor_costs geode_cost = {
339 COSTS_N_INSNS (1), /* cost of an add instruction */
340 COSTS_N_INSNS (1), /* cost of a lea instruction */
341 COSTS_N_INSNS (2), /* variable shift costs */
342 COSTS_N_INSNS (1), /* constant shift costs */
343 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
344 COSTS_N_INSNS (4), /* HI */
345 COSTS_N_INSNS (7), /* SI */
346 COSTS_N_INSNS (7), /* DI */
347 COSTS_N_INSNS (7)}, /* other */
348 0, /* cost of multiply per each bit set */
349 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
350 COSTS_N_INSNS (23), /* HI */
351 COSTS_N_INSNS (39), /* SI */
352 COSTS_N_INSNS (39), /* DI */
353 COSTS_N_INSNS (39)}, /* other */
354 COSTS_N_INSNS (1), /* cost of movsx */
355 COSTS_N_INSNS (1), /* cost of movzx */
356 8, /* "large" insn */
357 4, /* MOVE_RATIO */
358 1, /* cost for loading QImode using movzbl */
359 {1, 1, 1}, /* cost of loading integer registers
360 in QImode, HImode and SImode.
361 Relative to reg-reg move (2). */
362 {1, 1, 1}, /* cost of storing integer registers */
363 1, /* cost of reg,reg fld/fst */
364 {1, 1, 1}, /* cost of loading fp registers
365 in SFmode, DFmode and XFmode */
366 {4, 6, 6}, /* cost of storing fp registers
367 in SFmode, DFmode and XFmode */
369 1, /* cost of moving MMX register */
370 {1, 1}, /* cost of loading MMX registers
371 in SImode and DImode */
372 {1, 1}, /* cost of storing MMX registers
373 in SImode and DImode */
374 1, /* cost of moving SSE register */
375 {1, 1, 1}, /* cost of loading SSE registers
376 in SImode, DImode and TImode */
377 {1, 1, 1}, /* cost of storing SSE registers
378 in SImode, DImode and TImode */
379 1, /* MMX or SSE register to integer */
380 32, /* size of prefetch block */
381 1, /* number of parallel prefetches */
382 1, /* Branch cost */
383 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
384 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
385 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
386 COSTS_N_INSNS (1), /* cost of FABS instruction. */
387 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
388 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
391 static const
392 struct processor_costs k6_cost = {
393 COSTS_N_INSNS (1), /* cost of an add instruction */
394 COSTS_N_INSNS (2), /* cost of a lea instruction */
395 COSTS_N_INSNS (1), /* variable shift costs */
396 COSTS_N_INSNS (1), /* constant shift costs */
397 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
398 COSTS_N_INSNS (3), /* HI */
399 COSTS_N_INSNS (3), /* SI */
400 COSTS_N_INSNS (3), /* DI */
401 COSTS_N_INSNS (3)}, /* other */
402 0, /* cost of multiply per each bit set */
403 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
404 COSTS_N_INSNS (18), /* HI */
405 COSTS_N_INSNS (18), /* SI */
406 COSTS_N_INSNS (18), /* DI */
407 COSTS_N_INSNS (18)}, /* other */
408 COSTS_N_INSNS (2), /* cost of movsx */
409 COSTS_N_INSNS (2), /* cost of movzx */
410 8, /* "large" insn */
411 4, /* MOVE_RATIO */
412 3, /* cost for loading QImode using movzbl */
413 {4, 5, 4}, /* cost of loading integer registers
414 in QImode, HImode and SImode.
415 Relative to reg-reg move (2). */
416 {2, 3, 2}, /* cost of storing integer registers */
417 4, /* cost of reg,reg fld/fst */
418 {6, 6, 6}, /* cost of loading fp registers
419 in SFmode, DFmode and XFmode */
420 {4, 4, 4}, /* cost of storing fp registers
421 in SFmode, DFmode and XFmode */
422 2, /* cost of moving MMX register */
423 {2, 2}, /* cost of loading MMX registers
424 in SImode and DImode */
425 {2, 2}, /* cost of storing MMX registers
426 in SImode and DImode */
427 2, /* cost of moving SSE register */
428 {2, 2, 8}, /* cost of loading SSE registers
429 in SImode, DImode and TImode */
430 {2, 2, 8}, /* cost of storing SSE registers
431 in SImode, DImode and TImode */
432 6, /* MMX or SSE register to integer */
433 32, /* size of prefetch block */
434 1, /* number of parallel prefetches */
435 1, /* Branch cost */
436 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
437 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
438 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
439 COSTS_N_INSNS (2), /* cost of FABS instruction. */
440 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
441 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
444 static const
445 struct processor_costs athlon_cost = {
446 COSTS_N_INSNS (1), /* cost of an add instruction */
447 COSTS_N_INSNS (2), /* cost of a lea instruction */
448 COSTS_N_INSNS (1), /* variable shift costs */
449 COSTS_N_INSNS (1), /* constant shift costs */
450 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
451 COSTS_N_INSNS (5), /* HI */
452 COSTS_N_INSNS (5), /* SI */
453 COSTS_N_INSNS (5), /* DI */
454 COSTS_N_INSNS (5)}, /* other */
455 0, /* cost of multiply per each bit set */
456 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
457 COSTS_N_INSNS (26), /* HI */
458 COSTS_N_INSNS (42), /* SI */
459 COSTS_N_INSNS (74), /* DI */
460 COSTS_N_INSNS (74)}, /* other */
461 COSTS_N_INSNS (1), /* cost of movsx */
462 COSTS_N_INSNS (1), /* cost of movzx */
463 8, /* "large" insn */
464 9, /* MOVE_RATIO */
465 4, /* cost for loading QImode using movzbl */
466 {3, 4, 3}, /* cost of loading integer registers
467 in QImode, HImode and SImode.
468 Relative to reg-reg move (2). */
469 {3, 4, 3}, /* cost of storing integer registers */
470 4, /* cost of reg,reg fld/fst */
471 {4, 4, 12}, /* cost of loading fp registers
472 in SFmode, DFmode and XFmode */
473 {6, 6, 8}, /* cost of storing fp registers
474 in SFmode, DFmode and XFmode */
475 2, /* cost of moving MMX register */
476 {4, 4}, /* cost of loading MMX registers
477 in SImode and DImode */
478 {4, 4}, /* cost of storing MMX registers
479 in SImode and DImode */
480 2, /* cost of moving SSE register */
481 {4, 4, 6}, /* cost of loading SSE registers
482 in SImode, DImode and TImode */
483 {4, 4, 5}, /* cost of storing SSE registers
484 in SImode, DImode and TImode */
485 5, /* MMX or SSE register to integer */
486 64, /* size of prefetch block */
487 6, /* number of parallel prefetches */
488 5, /* Branch cost */
489 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
490 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
491 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
492 COSTS_N_INSNS (2), /* cost of FABS instruction. */
493 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
494 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
497 static const
498 struct processor_costs k8_cost = {
499 COSTS_N_INSNS (1), /* cost of an add instruction */
500 COSTS_N_INSNS (2), /* cost of a lea instruction */
501 COSTS_N_INSNS (1), /* variable shift costs */
502 COSTS_N_INSNS (1), /* constant shift costs */
503 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
504 COSTS_N_INSNS (4), /* HI */
505 COSTS_N_INSNS (3), /* SI */
506 COSTS_N_INSNS (4), /* DI */
507 COSTS_N_INSNS (5)}, /* other */
508 0, /* cost of multiply per each bit set */
509 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
510 COSTS_N_INSNS (26), /* HI */
511 COSTS_N_INSNS (42), /* SI */
512 COSTS_N_INSNS (74), /* DI */
513 COSTS_N_INSNS (74)}, /* other */
514 COSTS_N_INSNS (1), /* cost of movsx */
515 COSTS_N_INSNS (1), /* cost of movzx */
516 8, /* "large" insn */
517 9, /* MOVE_RATIO */
518 4, /* cost for loading QImode using movzbl */
519 {3, 4, 3}, /* cost of loading integer registers
520 in QImode, HImode and SImode.
521 Relative to reg-reg move (2). */
522 {3, 4, 3}, /* cost of storing integer registers */
523 4, /* cost of reg,reg fld/fst */
524 {4, 4, 12}, /* cost of loading fp registers
525 in SFmode, DFmode and XFmode */
526 {6, 6, 8}, /* cost of storing fp registers
527 in SFmode, DFmode and XFmode */
528 2, /* cost of moving MMX register */
529 {3, 3}, /* cost of loading MMX registers
530 in SImode and DImode */
531 {4, 4}, /* cost of storing MMX registers
532 in SImode and DImode */
533 2, /* cost of moving SSE register */
534 {4, 3, 6}, /* cost of loading SSE registers
535 in SImode, DImode and TImode */
536 {4, 4, 5}, /* cost of storing SSE registers
537 in SImode, DImode and TImode */
538 5, /* MMX or SSE register to integer */
539 64, /* size of prefetch block */
540 /* New AMD processors never drop prefetches; if they cannot be performed
541 immediately, they are queued. We set number of simultaneous prefetches
542 to a large constant to reflect this (it probably is not a good idea not
543 to limit number of prefetches at all, as their execution also takes some
544 time). */
545 100, /* number of parallel prefetches */
546 5, /* Branch cost */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
555 static const
556 struct processor_costs pentium4_cost = {
557 COSTS_N_INSNS (1), /* cost of an add instruction */
558 COSTS_N_INSNS (3), /* cost of a lea instruction */
559 COSTS_N_INSNS (4), /* variable shift costs */
560 COSTS_N_INSNS (4), /* constant shift costs */
561 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
562 COSTS_N_INSNS (15), /* HI */
563 COSTS_N_INSNS (15), /* SI */
564 COSTS_N_INSNS (15), /* DI */
565 COSTS_N_INSNS (15)}, /* other */
566 0, /* cost of multiply per each bit set */
567 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
568 COSTS_N_INSNS (56), /* HI */
569 COSTS_N_INSNS (56), /* SI */
570 COSTS_N_INSNS (56), /* DI */
571 COSTS_N_INSNS (56)}, /* other */
572 COSTS_N_INSNS (1), /* cost of movsx */
573 COSTS_N_INSNS (1), /* cost of movzx */
574 16, /* "large" insn */
575 6, /* MOVE_RATIO */
576 2, /* cost for loading QImode using movzbl */
577 {4, 5, 4}, /* cost of loading integer registers
578 in QImode, HImode and SImode.
579 Relative to reg-reg move (2). */
580 {2, 3, 2}, /* cost of storing integer registers */
581 2, /* cost of reg,reg fld/fst */
582 {2, 2, 6}, /* cost of loading fp registers
583 in SFmode, DFmode and XFmode */
584 {4, 4, 6}, /* cost of storing fp registers
585 in SFmode, DFmode and XFmode */
586 2, /* cost of moving MMX register */
587 {2, 2}, /* cost of loading MMX registers
588 in SImode and DImode */
589 {2, 2}, /* cost of storing MMX registers
590 in SImode and DImode */
591 12, /* cost of moving SSE register */
592 {12, 12, 12}, /* cost of loading SSE registers
593 in SImode, DImode and TImode */
594 {2, 2, 8}, /* cost of storing SSE registers
595 in SImode, DImode and TImode */
596 10, /* MMX or SSE register to integer */
597 64, /* size of prefetch block */
598 6, /* number of parallel prefetches */
599 2, /* Branch cost */
600 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
601 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
602 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
603 COSTS_N_INSNS (2), /* cost of FABS instruction. */
604 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
605 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
608 static const
609 struct processor_costs nocona_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (1), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (10), /* HI */
616 COSTS_N_INSNS (10), /* SI */
617 COSTS_N_INSNS (10), /* DI */
618 COSTS_N_INSNS (10)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (66), /* HI */
622 COSTS_N_INSNS (66), /* SI */
623 COSTS_N_INSNS (66), /* DI */
624 COSTS_N_INSNS (66)}, /* other */
625 COSTS_N_INSNS (1), /* cost of movsx */
626 COSTS_N_INSNS (1), /* cost of movzx */
627 16, /* "large" insn */
628 17, /* MOVE_RATIO */
629 4, /* cost for loading QImode using movzbl */
630 {4, 4, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {4, 4, 4}, /* cost of storing integer registers */
634 3, /* cost of reg,reg fld/fst */
635 {12, 12, 12}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 6, /* cost of moving MMX register */
640 {12, 12}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {12, 12}, /* cost of storing MMX registers
643 in SImode and DImode */
644 6, /* cost of moving SSE register */
645 {12, 12, 12}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {12, 12, 12}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 8, /* MMX or SSE register to integer */
650 128, /* size of prefetch block */
651 8, /* number of parallel prefetches */
652 1, /* Branch cost */
653 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (3), /* cost of FABS instruction. */
657 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
661 /* Generic64 should produce code tuned for Nocona and K8. */
662 static const
663 struct processor_costs generic64_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 /* On all chips taken into consideration lea is 2 cycles and more. With
666 this cost however our current implementation of synth_mult results in
667 use of unnecessary temporary registers causing regression on several
668 SPECfp benchmarks. */
669 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
670 COSTS_N_INSNS (1), /* variable shift costs */
671 COSTS_N_INSNS (1), /* constant shift costs */
672 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
673 COSTS_N_INSNS (4), /* HI */
674 COSTS_N_INSNS (3), /* SI */
675 COSTS_N_INSNS (4), /* DI */
676 COSTS_N_INSNS (2)}, /* other */
677 0, /* cost of multiply per each bit set */
678 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
679 COSTS_N_INSNS (26), /* HI */
680 COSTS_N_INSNS (42), /* SI */
681 COSTS_N_INSNS (74), /* DI */
682 COSTS_N_INSNS (74)}, /* other */
683 COSTS_N_INSNS (1), /* cost of movsx */
684 COSTS_N_INSNS (1), /* cost of movzx */
685 8, /* "large" insn */
686 17, /* MOVE_RATIO */
687 4, /* cost for loading QImode using movzbl */
688 {4, 4, 4}, /* cost of loading integer registers
689 in QImode, HImode and SImode.
690 Relative to reg-reg move (2). */
691 {4, 4, 4}, /* cost of storing integer registers */
692 4, /* cost of reg,reg fld/fst */
693 {12, 12, 12}, /* cost of loading fp registers
694 in SFmode, DFmode and XFmode */
695 {6, 6, 8}, /* cost of storing fp registers
696 in SFmode, DFmode and XFmode */
697 2, /* cost of moving MMX register */
698 {8, 8}, /* cost of loading MMX registers
699 in SImode and DImode */
700 {8, 8}, /* cost of storing MMX registers
701 in SImode and DImode */
702 2, /* cost of moving SSE register */
703 {8, 8, 8}, /* cost of loading SSE registers
704 in SImode, DImode and TImode */
705 {8, 8, 8}, /* cost of storing SSE registers
706 in SImode, DImode and TImode */
707 5, /* MMX or SSE register to integer */
708 64, /* size of prefetch block */
709 6, /* number of parallel prefetches */
710 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
711 is increased to perhaps more appropriate value of 5. */
712 3, /* Branch cost */
713 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
714 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
715 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
716 COSTS_N_INSNS (8), /* cost of FABS instruction. */
717 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
718 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
721 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
722 static const
723 struct processor_costs generic32_cost = {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
726 COSTS_N_INSNS (1), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (4), /* HI */
730 COSTS_N_INSNS (3), /* SI */
731 COSTS_N_INSNS (4), /* DI */
732 COSTS_N_INSNS (2)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (26), /* HI */
736 COSTS_N_INSNS (42), /* SI */
737 COSTS_N_INSNS (74), /* DI */
738 COSTS_N_INSNS (74)}, /* other */
739 COSTS_N_INSNS (1), /* cost of movsx */
740 COSTS_N_INSNS (1), /* cost of movzx */
741 8, /* "large" insn */
742 17, /* MOVE_RATIO */
743 4, /* cost for loading QImode using movzbl */
744 {4, 4, 4}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {4, 4, 4}, /* cost of storing integer registers */
748 4, /* cost of reg,reg fld/fst */
749 {12, 12, 12}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {6, 6, 8}, /* cost of storing fp registers
752 in SFmode, DFmode and XFmode */
753 2, /* cost of moving MMX register */
754 {8, 8}, /* cost of loading MMX registers
755 in SImode and DImode */
756 {8, 8}, /* cost of storing MMX registers
757 in SImode and DImode */
758 2, /* cost of moving SSE register */
759 {8, 8, 8}, /* cost of loading SSE registers
760 in SImode, DImode and TImode */
761 {8, 8, 8}, /* cost of storing SSE registers
762 in SImode, DImode and TImode */
763 5, /* MMX or SSE register to integer */
764 64, /* size of prefetch block */
765 6, /* number of parallel prefetches */
766 3, /* Branch cost */
767 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
768 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
769 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
770 COSTS_N_INSNS (8), /* cost of FABS instruction. */
771 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
772 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
775 const struct processor_costs *ix86_cost = &pentium_cost;
777 /* Processor feature/optimization bitmasks. */
778 #define m_386 (1<<PROCESSOR_I386)
779 #define m_486 (1<<PROCESSOR_I486)
780 #define m_PENT (1<<PROCESSOR_PENTIUM)
781 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
782 #define m_GEODE (1<<PROCESSOR_GEODE)
783 #define m_K6_GEODE (m_K6 | m_GEODE)
784 #define m_K6 (1<<PROCESSOR_K6)
785 #define m_ATHLON (1<<PROCESSOR_ATHLON)
786 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
787 #define m_K8 (1<<PROCESSOR_K8)
788 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
789 #define m_NOCONA (1<<PROCESSOR_NOCONA)
790 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
791 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
792 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
794 /* Generic instruction choice should be common subset of supported CPUs
795 (PPro/PENT4/NOCONA/Athlon/K8). */
797 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
798 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
799 generic because it is not working well with PPro base chips. */
800 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC64;
801 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
802 const int x86_zero_extend_with_and = m_486 | m_PENT;
803 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
804 const int x86_double_with_add = ~m_386;
805 const int x86_use_bit_test = m_386;
806 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
807 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
808 const int x86_3dnow_a = m_ATHLON_K8;
809 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
810 /* Branch hints were put in P4 based on simulation result. But
811 after P4 was made, no performance benefit was observed with
812 branch hints. It also increases the code size. As the result,
813 icc never generates branch hints. */
814 const int x86_branch_hints = 0;
815 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
816 /* We probably ought to watch for partial register stalls on Generic32
817 compilation setting as well. However in current implementation the
818 partial register stalls are not eliminated very well - they can
819 be introduced via subregs synthesized by combine and can happen
820 in caller/callee saving sequences.
821 Because this option pays back little on PPro based chips and is in conflict
822 with partial reg. dependencies used by Athlon/P4 based chips, it is better
823 to leave it off for generic32 for now. */
824 const int x86_partial_reg_stall = m_PPRO;
825 const int x86_partial_flag_reg_stall = m_GENERIC;
826 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
827 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
828 const int x86_use_mov0 = m_K6;
829 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
830 const int x86_read_modify_write = ~m_PENT;
831 const int x86_read_modify = ~(m_PENT | m_PPRO);
832 const int x86_split_long_moves = m_PPRO;
833 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
834 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
835 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
836 const int x86_qimode_math = ~(0);
837 const int x86_promote_qi_regs = 0;
838 /* On PPro this flag is meant to avoid partial register stalls. Just like
839 the x86_partial_reg_stall this option might be considered for Generic32
840 if our scheme for avoiding partial stalls was more effective. */
841 const int x86_himode_math = ~(m_PPRO);
842 const int x86_promote_hi_regs = m_PPRO;
843 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
844 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
845 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC;
846 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
847 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC | m_GEODE);
848 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
849 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
850 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
851 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
852 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
853 const int x86_shift1 = ~m_486;
854 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
855 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
856 that thread 128bit SSE registers as single units versus K8 based chips that
857 divide SSE registers to two 64bit halves.
858 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
859 to allow register renaming on 128bit SSE units, but usually results in one
860 extra microop on 64bit SSE units. Experimental results shows that disabling
861 this option on P4 brings over 20% SPECfp regression, while enabling it on
862 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
863 of moves. */
864 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
865 /* Set for machines where the type and dependencies are resolved on SSE
866 register parts instead of whole registers, so we may maintain just
867 lower part of scalar values in proper format leaving the upper part
868 undefined. */
869 const int x86_sse_split_regs = m_ATHLON_K8;
870 const int x86_sse_typeless_stores = m_ATHLON_K8;
871 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
872 const int x86_use_ffreep = m_ATHLON_K8;
873 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE;
874 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
876 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
877 integer data in xmm registers. Which results in pretty abysmal code. */
878 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
880 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
881 /* Some CPU cores are not able to predict more than 4 branch instructions in
882 the 16 byte window. */
883 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
884 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_GENERIC;
885 const int x86_use_bt = m_ATHLON_K8;
886 /* Compare and exchange was added for 80486. */
887 const int x86_cmpxchg = ~m_386;
888 /* Compare and exchange 8 bytes was added for pentium. */
889 const int x86_cmpxchg8b = ~(m_386 | m_486);
890 /* Compare and exchange 16 bytes was added for nocona. */
891 const int x86_cmpxchg16b = m_NOCONA;
892 /* Exchange and add was added for 80486. */
893 const int x86_xadd = ~m_386;
894 /* Byteswap was added for 80486. */
895 const int x86_bswap = ~m_386;
896 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
898 /* In case the average insn count for single function invocation is
899 lower than this constant, emit fast (but longer) prologue and
900 epilogue code. */
901 #define FAST_PROLOGUE_INSN_COUNT 20
903 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
904 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
905 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
906 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
908 /* Array of the smallest class containing reg number REGNO, indexed by
909 REGNO. Used by REGNO_REG_CLASS in i386.h. */
911 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
913 /* ax, dx, cx, bx */
914 AREG, DREG, CREG, BREG,
915 /* si, di, bp, sp */
916 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
917 /* FP registers */
918 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
919 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
920 /* arg pointer */
921 NON_Q_REGS,
922 /* flags, fpsr, fpcr, dirflag, frame */
923 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
924 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
925 SSE_REGS, SSE_REGS,
926 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
927 MMX_REGS, MMX_REGS,
928 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
929 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
930 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
931 SSE_REGS, SSE_REGS,
934 /* The "default" register map used in 32bit mode. */
936 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
938 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
939 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
940 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
941 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
942 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
943 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
944 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
947 static int const x86_64_int_parameter_registers[6] =
949 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
950 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
953 static int const x86_64_int_return_registers[4] =
955 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
958 /* The "default" register map used in 64bit mode. */
959 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
961 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
962 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
963 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
964 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
965 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
966 8,9,10,11,12,13,14,15, /* extended integer registers */
967 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
970 /* Define the register numbers to be used in Dwarf debugging information.
971 The SVR4 reference port C compiler uses the following register numbers
972 in its Dwarf output code:
973 0 for %eax (gcc regno = 0)
974 1 for %ecx (gcc regno = 2)
975 2 for %edx (gcc regno = 1)
976 3 for %ebx (gcc regno = 3)
977 4 for %esp (gcc regno = 7)
978 5 for %ebp (gcc regno = 6)
979 6 for %esi (gcc regno = 4)
980 7 for %edi (gcc regno = 5)
981 The following three DWARF register numbers are never generated by
982 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
983 believes these numbers have these meanings.
984 8 for %eip (no gcc equivalent)
985 9 for %eflags (gcc regno = 17)
986 10 for %trapno (no gcc equivalent)
987 It is not at all clear how we should number the FP stack registers
988 for the x86 architecture. If the version of SDB on x86/svr4 were
989 a bit less brain dead with respect to floating-point then we would
990 have a precedent to follow with respect to DWARF register numbers
991 for x86 FP registers, but the SDB on x86/svr4 is so completely
992 broken with respect to FP registers that it is hardly worth thinking
993 of it as something to strive for compatibility with.
994 The version of x86/svr4 SDB I have at the moment does (partially)
995 seem to believe that DWARF register number 11 is associated with
996 the x86 register %st(0), but that's about all. Higher DWARF
997 register numbers don't seem to be associated with anything in
998 particular, and even for DWARF regno 11, SDB only seems to under-
999 stand that it should say that a variable lives in %st(0) (when
1000 asked via an `=' command) if we said it was in DWARF regno 11,
1001 but SDB still prints garbage when asked for the value of the
1002 variable in question (via a `/' command).
1003 (Also note that the labels SDB prints for various FP stack regs
1004 when doing an `x' command are all wrong.)
1005 Note that these problems generally don't affect the native SVR4
1006 C compiler because it doesn't allow the use of -O with -g and
1007 because when it is *not* optimizing, it allocates a memory
1008 location for each floating-point variable, and the memory
1009 location is what gets described in the DWARF AT_location
1010 attribute for the variable in question.
1011 Regardless of the severe mental illness of the x86/svr4 SDB, we
1012 do something sensible here and we use the following DWARF
1013 register numbers. Note that these are all stack-top-relative
1014 numbers.
1015 11 for %st(0) (gcc regno = 8)
1016 12 for %st(1) (gcc regno = 9)
1017 13 for %st(2) (gcc regno = 10)
1018 14 for %st(3) (gcc regno = 11)
1019 15 for %st(4) (gcc regno = 12)
1020 16 for %st(5) (gcc regno = 13)
1021 17 for %st(6) (gcc regno = 14)
1022 18 for %st(7) (gcc regno = 15)
1024 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1026 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1027 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1028 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1029 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1030 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1031 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1032 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1035 /* Test and compare insns in i386.md store the information needed to
1036 generate branch and scc insns here. */
1038 rtx ix86_compare_op0 = NULL_RTX;
1039 rtx ix86_compare_op1 = NULL_RTX;
1040 rtx ix86_compare_emitted = NULL_RTX;
1042 /* Size of the register save area. */
1043 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1045 /* Define the structure for the machine field in struct function. */
1047 struct stack_local_entry GTY(())
1049 unsigned short mode;
1050 unsigned short n;
1051 rtx rtl;
1052 struct stack_local_entry *next;
1055 /* Structure describing stack frame layout.
1056 Stack grows downward:
1058 [arguments]
1059 <- ARG_POINTER
1060 saved pc
1062 saved frame pointer if frame_pointer_needed
1063 <- HARD_FRAME_POINTER
1064 [saved regs]
1066 [padding1] \
1068 [va_arg registers] (
1069 > to_allocate <- FRAME_POINTER
1070 [frame] (
1072 [padding2] /
1074 struct ix86_frame
1076 int nregs;
1077 int padding1;
1078 int va_arg_size;
1079 HOST_WIDE_INT frame;
1080 int padding2;
1081 int outgoing_arguments_size;
1082 int red_zone_size;
1084 HOST_WIDE_INT to_allocate;
1085 /* The offsets relative to ARG_POINTER. */
1086 HOST_WIDE_INT frame_pointer_offset;
1087 HOST_WIDE_INT hard_frame_pointer_offset;
1088 HOST_WIDE_INT stack_pointer_offset;
1090 /* When save_regs_using_mov is set, emit prologue using
1091 move instead of push instructions. */
1092 bool save_regs_using_mov;
1095 /* Code model option. */
1096 enum cmodel ix86_cmodel;
1097 /* Asm dialect. */
1098 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1099 /* TLS dialects. */
1100 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1102 /* Which unit we are generating floating point math for. */
1103 enum fpmath_unit ix86_fpmath;
1105 /* Which cpu are we scheduling for. */
1106 enum processor_type ix86_tune;
1107 /* Which instruction set architecture to use. */
1108 enum processor_type ix86_arch;
1110 /* true if sse prefetch instruction is not NOOP. */
1111 int x86_prefetch_sse;
1113 /* ix86_regparm_string as a number */
1114 static int ix86_regparm;
1116 /* -mstackrealign option */
1117 extern int ix86_force_align_arg_pointer;
1118 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1120 /* Preferred alignment for stack boundary in bits. */
1121 unsigned int ix86_preferred_stack_boundary;
1123 /* Values 1-5: see jump.c */
1124 int ix86_branch_cost;
1126 /* Variables which are this size or smaller are put in the data/bss
1127 or ldata/lbss sections. */
1129 int ix86_section_threshold = 65536;
1131 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1132 char internal_label_prefix[16];
1133 int internal_label_prefix_len;
1135 static bool ix86_handle_option (size_t, const char *, int);
1136 static void output_pic_addr_const (FILE *, rtx, int);
1137 static void put_condition_code (enum rtx_code, enum machine_mode,
1138 int, int, FILE *);
1139 static const char *get_some_local_dynamic_name (void);
1140 static int get_some_local_dynamic_name_1 (rtx *, void *);
1141 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1142 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1143 rtx *);
1144 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1145 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1146 enum machine_mode);
1147 static rtx get_thread_pointer (int);
1148 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1149 static void get_pc_thunk_name (char [32], unsigned int);
1150 static rtx gen_push (rtx);
1151 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1152 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1153 static struct machine_function * ix86_init_machine_status (void);
1154 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1155 static int ix86_nsaved_regs (void);
1156 static void ix86_emit_save_regs (void);
1157 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1158 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1159 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1160 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1161 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1162 static rtx ix86_expand_aligntest (rtx, int);
1163 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1164 static int ix86_issue_rate (void);
1165 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1166 static int ia32_multipass_dfa_lookahead (void);
1167 static void ix86_init_mmx_sse_builtins (void);
1168 static rtx x86_this_parameter (tree);
1169 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1170 HOST_WIDE_INT, tree);
1171 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1172 static void x86_file_start (void);
1173 static void ix86_reorg (void);
1174 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1175 static tree ix86_build_builtin_va_list (void);
1176 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1177 tree, int *, int);
1178 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1179 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1180 static bool ix86_vector_mode_supported_p (enum machine_mode);
1182 static int ix86_address_cost (rtx);
1183 static bool ix86_cannot_force_const_mem (rtx);
1184 static rtx ix86_delegitimize_address (rtx);
1186 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1188 struct builtin_description;
1189 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1190 tree, rtx);
1191 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1192 tree, rtx);
1193 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1194 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1195 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1196 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1197 static rtx safe_vector_operand (rtx, enum machine_mode);
1198 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1199 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1200 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1201 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1202 static int ix86_fp_comparison_cost (enum rtx_code code);
1203 static unsigned int ix86_select_alt_pic_regnum (void);
1204 static int ix86_save_reg (unsigned int, int);
1205 static void ix86_compute_frame_layout (struct ix86_frame *);
1206 static int ix86_comp_type_attributes (tree, tree);
1207 static int ix86_function_regparm (tree, tree);
1208 const struct attribute_spec ix86_attribute_table[];
1209 static bool ix86_function_ok_for_sibcall (tree, tree);
1210 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1211 static int ix86_value_regno (enum machine_mode, tree, tree);
1212 static bool contains_128bit_aligned_vector_p (tree);
1213 static rtx ix86_struct_value_rtx (tree, int);
1214 static bool ix86_ms_bitfield_layout_p (tree);
1215 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1216 static int extended_reg_mentioned_1 (rtx *, void *);
1217 static bool ix86_rtx_costs (rtx, int, int, int *);
1218 static int min_insn_size (rtx);
1219 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1220 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1221 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1222 tree, bool);
1223 static void ix86_init_builtins (void);
1224 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1225 static const char *ix86_mangle_fundamental_type (tree);
1226 static tree ix86_stack_protect_fail (void);
1227 static rtx ix86_internal_arg_pointer (void);
1228 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1230 /* This function is only used on Solaris. */
1231 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1232 ATTRIBUTE_UNUSED;
1234 /* Register class used for passing given 64bit part of the argument.
1235 These represent classes as documented by the PS ABI, with the exception
1236 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1237 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1239 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1240 whenever possible (upper half does contain padding).
1242 enum x86_64_reg_class
1244 X86_64_NO_CLASS,
1245 X86_64_INTEGER_CLASS,
1246 X86_64_INTEGERSI_CLASS,
1247 X86_64_SSE_CLASS,
1248 X86_64_SSESF_CLASS,
1249 X86_64_SSEDF_CLASS,
1250 X86_64_SSEUP_CLASS,
1251 X86_64_X87_CLASS,
1252 X86_64_X87UP_CLASS,
1253 X86_64_COMPLEX_X87_CLASS,
1254 X86_64_MEMORY_CLASS
1256 static const char * const x86_64_reg_class_name[] = {
1257 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1258 "sseup", "x87", "x87up", "cplx87", "no"
1261 #define MAX_CLASSES 4
1263 /* Table of constants used by fldpi, fldln2, etc.... */
1264 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1265 static bool ext_80387_constants_init = 0;
1266 static void init_ext_80387_constants (void);
1267 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1268 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1269 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1270 static section *x86_64_elf_select_section (tree decl, int reloc,
1271 unsigned HOST_WIDE_INT align)
1272 ATTRIBUTE_UNUSED;
1274 /* Initialize the GCC target structure. */
1275 #undef TARGET_ATTRIBUTE_TABLE
1276 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1277 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1278 # undef TARGET_MERGE_DECL_ATTRIBUTES
1279 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1280 #endif
1282 #undef TARGET_COMP_TYPE_ATTRIBUTES
1283 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1285 #undef TARGET_INIT_BUILTINS
1286 #define TARGET_INIT_BUILTINS ix86_init_builtins
1287 #undef TARGET_EXPAND_BUILTIN
1288 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1290 #undef TARGET_ASM_FUNCTION_EPILOGUE
1291 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1293 #undef TARGET_ENCODE_SECTION_INFO
1294 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1295 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1296 #else
1297 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1298 #endif
1300 #undef TARGET_ASM_OPEN_PAREN
1301 #define TARGET_ASM_OPEN_PAREN ""
1302 #undef TARGET_ASM_CLOSE_PAREN
1303 #define TARGET_ASM_CLOSE_PAREN ""
1305 #undef TARGET_ASM_ALIGNED_HI_OP
1306 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1307 #undef TARGET_ASM_ALIGNED_SI_OP
1308 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1309 #ifdef ASM_QUAD
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1312 #endif
1314 #undef TARGET_ASM_UNALIGNED_HI_OP
1315 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1316 #undef TARGET_ASM_UNALIGNED_SI_OP
1317 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1318 #undef TARGET_ASM_UNALIGNED_DI_OP
1319 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1321 #undef TARGET_SCHED_ADJUST_COST
1322 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1323 #undef TARGET_SCHED_ISSUE_RATE
1324 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1325 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1326 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1327 ia32_multipass_dfa_lookahead
1329 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1330 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1332 #ifdef HAVE_AS_TLS
1333 #undef TARGET_HAVE_TLS
1334 #define TARGET_HAVE_TLS true
1335 #endif
1336 #undef TARGET_CANNOT_FORCE_CONST_MEM
1337 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1338 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1339 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1341 #undef TARGET_DELEGITIMIZE_ADDRESS
1342 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1344 #undef TARGET_MS_BITFIELD_LAYOUT_P
1345 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1347 #if TARGET_MACHO
1348 #undef TARGET_BINDS_LOCAL_P
1349 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1350 #endif
1352 #undef TARGET_ASM_OUTPUT_MI_THUNK
1353 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1354 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1355 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1357 #undef TARGET_ASM_FILE_START
1358 #define TARGET_ASM_FILE_START x86_file_start
1360 #undef TARGET_DEFAULT_TARGET_FLAGS
1361 #define TARGET_DEFAULT_TARGET_FLAGS \
1362 (TARGET_DEFAULT \
1363 | TARGET_64BIT_DEFAULT \
1364 | TARGET_SUBTARGET_DEFAULT \
1365 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1367 #undef TARGET_HANDLE_OPTION
1368 #define TARGET_HANDLE_OPTION ix86_handle_option
1370 #undef TARGET_RTX_COSTS
1371 #define TARGET_RTX_COSTS ix86_rtx_costs
1372 #undef TARGET_ADDRESS_COST
1373 #define TARGET_ADDRESS_COST ix86_address_cost
1375 #undef TARGET_FIXED_CONDITION_CODE_REGS
1376 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1377 #undef TARGET_CC_MODES_COMPATIBLE
1378 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1380 #undef TARGET_MACHINE_DEPENDENT_REORG
1381 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1383 #undef TARGET_BUILD_BUILTIN_VA_LIST
1384 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1386 #undef TARGET_MD_ASM_CLOBBERS
1387 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1389 #undef TARGET_PROMOTE_PROTOTYPES
1390 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1391 #undef TARGET_STRUCT_VALUE_RTX
1392 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1393 #undef TARGET_SETUP_INCOMING_VARARGS
1394 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1395 #undef TARGET_MUST_PASS_IN_STACK
1396 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1397 #undef TARGET_PASS_BY_REFERENCE
1398 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1399 #undef TARGET_INTERNAL_ARG_POINTER
1400 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1401 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1402 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1404 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1405 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1407 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1408 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1410 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1411 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1413 #ifdef HAVE_AS_TLS
1414 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1415 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1416 #endif
1418 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1419 #undef TARGET_INSERT_ATTRIBUTES
1420 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1421 #endif
1423 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1424 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1426 #undef TARGET_STACK_PROTECT_FAIL
1427 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1429 #undef TARGET_FUNCTION_VALUE
1430 #define TARGET_FUNCTION_VALUE ix86_function_value
1432 struct gcc_target targetm = TARGET_INITIALIZER;
1435 /* The svr4 ABI for the i386 says that records and unions are returned
1436 in memory. */
1437 #ifndef DEFAULT_PCC_STRUCT_RETURN
1438 #define DEFAULT_PCC_STRUCT_RETURN 1
1439 #endif
1441 /* Implement TARGET_HANDLE_OPTION. */
1443 static bool
1444 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1446 switch (code)
1448 case OPT_m3dnow:
1449 if (!value)
1451 target_flags &= ~MASK_3DNOW_A;
1452 target_flags_explicit |= MASK_3DNOW_A;
1454 return true;
1456 case OPT_mmmx:
1457 if (!value)
1459 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1460 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1462 return true;
1464 case OPT_msse:
1465 if (!value)
1467 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1468 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1470 return true;
1472 case OPT_msse2:
1473 if (!value)
1475 target_flags &= ~MASK_SSE3;
1476 target_flags_explicit |= MASK_SSE3;
1478 return true;
1480 default:
1481 return true;
1485 /* Sometimes certain combinations of command options do not make
1486 sense on a particular target machine. You can define a macro
1487 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1488 defined, is executed once just after all the command options have
1489 been parsed.
1491 Don't use this macro to turn on various extra optimizations for
1492 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1494 void
1495 override_options (void)
1497 int i;
1498 int ix86_tune_defaulted = 0;
1500 /* Comes from final.c -- no real reason to change it. */
1501 #define MAX_CODE_ALIGN 16
1503 static struct ptt
1505 const struct processor_costs *cost; /* Processor costs */
1506 const int target_enable; /* Target flags to enable. */
1507 const int target_disable; /* Target flags to disable. */
1508 const int align_loop; /* Default alignments. */
1509 const int align_loop_max_skip;
1510 const int align_jump;
1511 const int align_jump_max_skip;
1512 const int align_func;
1514 const processor_target_table[PROCESSOR_max] =
1516 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1517 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1518 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1519 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1520 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1521 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1522 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1523 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1524 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1525 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1526 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1527 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1530 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1531 static struct pta
1533 const char *const name; /* processor name or nickname. */
1534 const enum processor_type processor;
1535 const enum pta_flags
1537 PTA_SSE = 1,
1538 PTA_SSE2 = 2,
1539 PTA_SSE3 = 4,
1540 PTA_MMX = 8,
1541 PTA_PREFETCH_SSE = 16,
1542 PTA_3DNOW = 32,
1543 PTA_3DNOW_A = 64,
1544 PTA_64BIT = 128,
1545 PTA_SSSE3 = 256
1546 } flags;
1548 const processor_alias_table[] =
1550 {"i386", PROCESSOR_I386, 0},
1551 {"i486", PROCESSOR_I486, 0},
1552 {"i586", PROCESSOR_PENTIUM, 0},
1553 {"pentium", PROCESSOR_PENTIUM, 0},
1554 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1555 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1556 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1557 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1558 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1559 {"i686", PROCESSOR_PENTIUMPRO, 0},
1560 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1561 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1562 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1563 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1564 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1565 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1566 | PTA_MMX | PTA_PREFETCH_SSE},
1567 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1568 | PTA_MMX | PTA_PREFETCH_SSE},
1569 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1570 | PTA_MMX | PTA_PREFETCH_SSE},
1571 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1572 | PTA_MMX | PTA_PREFETCH_SSE},
1573 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1574 | PTA_3DNOW_A},
1575 {"k6", PROCESSOR_K6, PTA_MMX},
1576 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1577 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1578 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1579 | PTA_3DNOW_A},
1580 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1581 | PTA_3DNOW | PTA_3DNOW_A},
1582 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1583 | PTA_3DNOW_A | PTA_SSE},
1584 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1585 | PTA_3DNOW_A | PTA_SSE},
1586 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1587 | PTA_3DNOW_A | PTA_SSE},
1588 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1589 | PTA_SSE | PTA_SSE2 },
1590 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1591 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1592 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1593 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1594 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1595 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1596 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1597 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1598 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1599 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1602 int const pta_size = ARRAY_SIZE (processor_alias_table);
1604 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1605 SUBTARGET_OVERRIDE_OPTIONS;
1606 #endif
1608 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1609 SUBSUBTARGET_OVERRIDE_OPTIONS;
1610 #endif
1612 /* -fPIC is the default for x86_64. */
1613 if (TARGET_MACHO && TARGET_64BIT)
1614 flag_pic = 2;
1616 /* Set the default values for switches whose default depends on TARGET_64BIT
1617 in case they weren't overwritten by command line options. */
1618 if (TARGET_64BIT)
1620 /* Mach-O doesn't support omitting the frame pointer for now. */
1621 if (flag_omit_frame_pointer == 2)
1622 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1623 if (flag_asynchronous_unwind_tables == 2)
1624 flag_asynchronous_unwind_tables = 1;
1625 if (flag_pcc_struct_return == 2)
1626 flag_pcc_struct_return = 0;
1628 else
1630 if (flag_omit_frame_pointer == 2)
1631 flag_omit_frame_pointer = 0;
1632 if (flag_asynchronous_unwind_tables == 2)
1633 flag_asynchronous_unwind_tables = 0;
1634 if (flag_pcc_struct_return == 2)
1635 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1638 /* Need to check -mtune=generic first. */
1639 if (ix86_tune_string)
1641 if (!strcmp (ix86_tune_string, "generic")
1642 || !strcmp (ix86_tune_string, "i686")
1643 /* As special support for cross compilers we read -mtune=native
1644 as -mtune=generic. With native compilers we won't see the
1645 -mtune=native, as it was changed by the driver. */
1646 || !strcmp (ix86_tune_string, "native"))
1648 if (TARGET_64BIT)
1649 ix86_tune_string = "generic64";
1650 else
1651 ix86_tune_string = "generic32";
1653 else if (!strncmp (ix86_tune_string, "generic", 7))
1654 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1656 else
1658 if (ix86_arch_string)
1659 ix86_tune_string = ix86_arch_string;
1660 if (!ix86_tune_string)
1662 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1663 ix86_tune_defaulted = 1;
1666 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1667 need to use a sensible tune option. */
1668 if (!strcmp (ix86_tune_string, "generic")
1669 || !strcmp (ix86_tune_string, "x86-64")
1670 || !strcmp (ix86_tune_string, "i686"))
1672 if (TARGET_64BIT)
1673 ix86_tune_string = "generic64";
1674 else
1675 ix86_tune_string = "generic32";
1678 if (!strcmp (ix86_tune_string, "x86-64"))
1679 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1680 "-mtune=generic instead as appropriate.");
1682 if (!ix86_arch_string)
1683 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1684 if (!strcmp (ix86_arch_string, "generic"))
1685 error ("generic CPU can be used only for -mtune= switch");
1686 if (!strncmp (ix86_arch_string, "generic", 7))
1687 error ("bad value (%s) for -march= switch", ix86_arch_string);
1689 if (ix86_cmodel_string != 0)
1691 if (!strcmp (ix86_cmodel_string, "small"))
1692 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1693 else if (!strcmp (ix86_cmodel_string, "medium"))
1694 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1695 else if (flag_pic)
1696 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1697 else if (!strcmp (ix86_cmodel_string, "32"))
1698 ix86_cmodel = CM_32;
1699 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1700 ix86_cmodel = CM_KERNEL;
1701 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1702 ix86_cmodel = CM_LARGE;
1703 else
1704 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1706 else
1708 ix86_cmodel = CM_32;
1709 if (TARGET_64BIT)
1710 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1712 if (ix86_asm_string != 0)
1714 if (! TARGET_MACHO
1715 && !strcmp (ix86_asm_string, "intel"))
1716 ix86_asm_dialect = ASM_INTEL;
1717 else if (!strcmp (ix86_asm_string, "att"))
1718 ix86_asm_dialect = ASM_ATT;
1719 else
1720 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1722 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1723 error ("code model %qs not supported in the %s bit mode",
1724 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1725 if (ix86_cmodel == CM_LARGE)
1726 sorry ("code model %<large%> not supported yet");
1727 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1728 sorry ("%i-bit mode not compiled in",
1729 (target_flags & MASK_64BIT) ? 64 : 32);
1731 for (i = 0; i < pta_size; i++)
1732 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1734 ix86_arch = processor_alias_table[i].processor;
1735 /* Default cpu tuning to the architecture. */
1736 ix86_tune = ix86_arch;
1737 if (processor_alias_table[i].flags & PTA_MMX
1738 && !(target_flags_explicit & MASK_MMX))
1739 target_flags |= MASK_MMX;
1740 if (processor_alias_table[i].flags & PTA_3DNOW
1741 && !(target_flags_explicit & MASK_3DNOW))
1742 target_flags |= MASK_3DNOW;
1743 if (processor_alias_table[i].flags & PTA_3DNOW_A
1744 && !(target_flags_explicit & MASK_3DNOW_A))
1745 target_flags |= MASK_3DNOW_A;
1746 if (processor_alias_table[i].flags & PTA_SSE
1747 && !(target_flags_explicit & MASK_SSE))
1748 target_flags |= MASK_SSE;
1749 if (processor_alias_table[i].flags & PTA_SSE2
1750 && !(target_flags_explicit & MASK_SSE2))
1751 target_flags |= MASK_SSE2;
1752 if (processor_alias_table[i].flags & PTA_SSE3
1753 && !(target_flags_explicit & MASK_SSE3))
1754 target_flags |= MASK_SSE3;
1755 if (processor_alias_table[i].flags & PTA_SSSE3
1756 && !(target_flags_explicit & MASK_SSSE3))
1757 target_flags |= MASK_SSSE3;
1758 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1759 x86_prefetch_sse = true;
1760 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1761 error ("CPU you selected does not support x86-64 "
1762 "instruction set");
1763 break;
1766 if (i == pta_size)
1767 error ("bad value (%s) for -march= switch", ix86_arch_string);
1769 for (i = 0; i < pta_size; i++)
1770 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1772 ix86_tune = processor_alias_table[i].processor;
1773 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1775 if (ix86_tune_defaulted)
1777 ix86_tune_string = "x86-64";
1778 for (i = 0; i < pta_size; i++)
1779 if (! strcmp (ix86_tune_string,
1780 processor_alias_table[i].name))
1781 break;
1782 ix86_tune = processor_alias_table[i].processor;
1784 else
1785 error ("CPU you selected does not support x86-64 "
1786 "instruction set");
1788 /* Intel CPUs have always interpreted SSE prefetch instructions as
1789 NOPs; so, we can enable SSE prefetch instructions even when
1790 -mtune (rather than -march) points us to a processor that has them.
1791 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1792 higher processors. */
1793 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1794 x86_prefetch_sse = true;
1795 break;
1797 if (i == pta_size)
1798 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1800 if (optimize_size)
1801 ix86_cost = &size_cost;
1802 else
1803 ix86_cost = processor_target_table[ix86_tune].cost;
1804 target_flags |= processor_target_table[ix86_tune].target_enable;
1805 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1807 /* Arrange to set up i386_stack_locals for all functions. */
1808 init_machine_status = ix86_init_machine_status;
1810 /* Validate -mregparm= value. */
1811 if (ix86_regparm_string)
1813 i = atoi (ix86_regparm_string);
1814 if (i < 0 || i > REGPARM_MAX)
1815 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1816 else
1817 ix86_regparm = i;
1819 else
1820 if (TARGET_64BIT)
1821 ix86_regparm = REGPARM_MAX;
1823 /* If the user has provided any of the -malign-* options,
1824 warn and use that value only if -falign-* is not set.
1825 Remove this code in GCC 3.2 or later. */
1826 if (ix86_align_loops_string)
1828 warning (0, "-malign-loops is obsolete, use -falign-loops");
1829 if (align_loops == 0)
1831 i = atoi (ix86_align_loops_string);
1832 if (i < 0 || i > MAX_CODE_ALIGN)
1833 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1834 else
1835 align_loops = 1 << i;
1839 if (ix86_align_jumps_string)
1841 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1842 if (align_jumps == 0)
1844 i = atoi (ix86_align_jumps_string);
1845 if (i < 0 || i > MAX_CODE_ALIGN)
1846 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1847 else
1848 align_jumps = 1 << i;
1852 if (ix86_align_funcs_string)
1854 warning (0, "-malign-functions is obsolete, use -falign-functions");
1855 if (align_functions == 0)
1857 i = atoi (ix86_align_funcs_string);
1858 if (i < 0 || i > MAX_CODE_ALIGN)
1859 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1860 else
1861 align_functions = 1 << i;
1865 /* Default align_* from the processor table. */
1866 if (align_loops == 0)
1868 align_loops = processor_target_table[ix86_tune].align_loop;
1869 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1871 if (align_jumps == 0)
1873 align_jumps = processor_target_table[ix86_tune].align_jump;
1874 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1876 if (align_functions == 0)
1878 align_functions = processor_target_table[ix86_tune].align_func;
1881 /* Validate -mbranch-cost= value, or provide default. */
1882 ix86_branch_cost = ix86_cost->branch_cost;
1883 if (ix86_branch_cost_string)
1885 i = atoi (ix86_branch_cost_string);
1886 if (i < 0 || i > 5)
1887 error ("-mbranch-cost=%d is not between 0 and 5", i);
1888 else
1889 ix86_branch_cost = i;
1891 if (ix86_section_threshold_string)
1893 i = atoi (ix86_section_threshold_string);
1894 if (i < 0)
1895 error ("-mlarge-data-threshold=%d is negative", i);
1896 else
1897 ix86_section_threshold = i;
1900 if (ix86_tls_dialect_string)
1902 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1903 ix86_tls_dialect = TLS_DIALECT_GNU;
1904 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1905 ix86_tls_dialect = TLS_DIALECT_GNU2;
1906 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1907 ix86_tls_dialect = TLS_DIALECT_SUN;
1908 else
1909 error ("bad value (%s) for -mtls-dialect= switch",
1910 ix86_tls_dialect_string);
1913 /* Keep nonleaf frame pointers. */
1914 if (flag_omit_frame_pointer)
1915 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1916 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1917 flag_omit_frame_pointer = 1;
1919 /* If we're doing fast math, we don't care about comparison order
1920 wrt NaNs. This lets us use a shorter comparison sequence. */
1921 if (flag_finite_math_only)
1922 target_flags &= ~MASK_IEEE_FP;
1924 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1925 since the insns won't need emulation. */
1926 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1927 target_flags &= ~MASK_NO_FANCY_MATH_387;
1929 /* Likewise, if the target doesn't have a 387, or we've specified
1930 software floating point, don't use 387 inline intrinsics. */
1931 if (!TARGET_80387)
1932 target_flags |= MASK_NO_FANCY_MATH_387;
1934 /* Turn on SSE3 builtins for -mssse3. */
1935 if (TARGET_SSSE3)
1936 target_flags |= MASK_SSE3;
1938 /* Turn on SSE2 builtins for -msse3. */
1939 if (TARGET_SSE3)
1940 target_flags |= MASK_SSE2;
1942 /* Turn on SSE builtins for -msse2. */
1943 if (TARGET_SSE2)
1944 target_flags |= MASK_SSE;
1946 /* Turn on MMX builtins for -msse. */
1947 if (TARGET_SSE)
1949 target_flags |= MASK_MMX & ~target_flags_explicit;
1950 x86_prefetch_sse = true;
1953 /* Turn on MMX builtins for 3Dnow. */
1954 if (TARGET_3DNOW)
1955 target_flags |= MASK_MMX;
1957 if (TARGET_64BIT)
1959 if (TARGET_ALIGN_DOUBLE)
1960 error ("-malign-double makes no sense in the 64bit mode");
1961 if (TARGET_RTD)
1962 error ("-mrtd calling convention not supported in the 64bit mode");
1964 /* Enable by default the SSE and MMX builtins. Do allow the user to
1965 explicitly disable any of these. In particular, disabling SSE and
1966 MMX for kernel code is extremely useful. */
1967 target_flags
1968 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1969 & ~target_flags_explicit);
1971 else
1973 /* i386 ABI does not specify red zone. It still makes sense to use it
1974 when programmer takes care to stack from being destroyed. */
1975 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1976 target_flags |= MASK_NO_RED_ZONE;
1979 /* Validate -mpreferred-stack-boundary= value, or provide default.
1980 The default of 128 bits is for Pentium III's SSE __m128. We can't
1981 change it because of optimize_size. Otherwise, we can't mix object
1982 files compiled with -Os and -On. */
1983 ix86_preferred_stack_boundary = 128;
1984 if (ix86_preferred_stack_boundary_string)
1986 i = atoi (ix86_preferred_stack_boundary_string);
1987 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1988 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1989 TARGET_64BIT ? 4 : 2);
1990 else
1991 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1994 /* Accept -msseregparm only if at least SSE support is enabled. */
1995 if (TARGET_SSEREGPARM
1996 && ! TARGET_SSE)
1997 error ("-msseregparm used without SSE enabled");
1999 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2001 if (ix86_fpmath_string != 0)
2003 if (! strcmp (ix86_fpmath_string, "387"))
2004 ix86_fpmath = FPMATH_387;
2005 else if (! strcmp (ix86_fpmath_string, "sse"))
2007 if (!TARGET_SSE)
2009 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2010 ix86_fpmath = FPMATH_387;
2012 else
2013 ix86_fpmath = FPMATH_SSE;
2015 else if (! strcmp (ix86_fpmath_string, "387,sse")
2016 || ! strcmp (ix86_fpmath_string, "sse,387"))
2018 if (!TARGET_SSE)
2020 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2021 ix86_fpmath = FPMATH_387;
2023 else if (!TARGET_80387)
2025 warning (0, "387 instruction set disabled, using SSE arithmetics");
2026 ix86_fpmath = FPMATH_SSE;
2028 else
2029 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2031 else
2032 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2035 /* If the i387 is disabled, then do not return values in it. */
2036 if (!TARGET_80387)
2037 target_flags &= ~MASK_FLOAT_RETURNS;
2039 if ((x86_accumulate_outgoing_args & TUNEMASK)
2040 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2041 && !optimize_size)
2042 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2044 /* ??? Unwind info is not correct around the CFG unless either a frame
2045 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2046 unwind info generation to be aware of the CFG and propagating states
2047 around edges. */
2048 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2049 || flag_exceptions || flag_non_call_exceptions)
2050 && flag_omit_frame_pointer
2051 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2053 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2054 warning (0, "unwind tables currently require either a frame pointer "
2055 "or -maccumulate-outgoing-args for correctness");
2056 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2059 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2061 char *p;
2062 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2063 p = strchr (internal_label_prefix, 'X');
2064 internal_label_prefix_len = p - internal_label_prefix;
2065 *p = '\0';
2068 /* When scheduling description is not available, disable scheduler pass
2069 so it won't slow down the compilation and make x87 code slower. */
2070 if (!TARGET_SCHEDULE)
2071 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2073 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2074 set_param_value ("simultaneous-prefetches",
2075 ix86_cost->simultaneous_prefetches);
2076 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2077 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2080 /* switch to the appropriate section for output of DECL.
2081 DECL is either a `VAR_DECL' node or a constant of some sort.
2082 RELOC indicates whether forming the initial value of DECL requires
2083 link-time relocations. */
2085 static section *
2086 x86_64_elf_select_section (tree decl, int reloc,
2087 unsigned HOST_WIDE_INT align)
2089 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2090 && ix86_in_large_data_p (decl))
2092 const char *sname = NULL;
2093 unsigned int flags = SECTION_WRITE;
2094 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2096 case SECCAT_DATA:
2097 sname = ".ldata";
2098 break;
2099 case SECCAT_DATA_REL:
2100 sname = ".ldata.rel";
2101 break;
2102 case SECCAT_DATA_REL_LOCAL:
2103 sname = ".ldata.rel.local";
2104 break;
2105 case SECCAT_DATA_REL_RO:
2106 sname = ".ldata.rel.ro";
2107 break;
2108 case SECCAT_DATA_REL_RO_LOCAL:
2109 sname = ".ldata.rel.ro.local";
2110 break;
2111 case SECCAT_BSS:
2112 sname = ".lbss";
2113 flags |= SECTION_BSS;
2114 break;
2115 case SECCAT_RODATA:
2116 case SECCAT_RODATA_MERGE_STR:
2117 case SECCAT_RODATA_MERGE_STR_INIT:
2118 case SECCAT_RODATA_MERGE_CONST:
2119 sname = ".lrodata";
2120 flags = 0;
2121 break;
2122 case SECCAT_SRODATA:
2123 case SECCAT_SDATA:
2124 case SECCAT_SBSS:
2125 gcc_unreachable ();
2126 case SECCAT_TEXT:
2127 case SECCAT_TDATA:
2128 case SECCAT_TBSS:
2129 /* We don't split these for medium model. Place them into
2130 default sections and hope for best. */
2131 break;
2133 if (sname)
2135 /* We might get called with string constants, but get_named_section
2136 doesn't like them as they are not DECLs. Also, we need to set
2137 flags in that case. */
2138 if (!DECL_P (decl))
2139 return get_section (sname, flags, NULL);
2140 return get_named_section (decl, sname, reloc);
2143 return default_elf_select_section (decl, reloc, align);
2146 /* Build up a unique section name, expressed as a
2147 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2148 RELOC indicates whether the initial value of EXP requires
2149 link-time relocations. */
2151 static void
2152 x86_64_elf_unique_section (tree decl, int reloc)
2154 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2155 && ix86_in_large_data_p (decl))
2157 const char *prefix = NULL;
2158 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2159 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2161 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2163 case SECCAT_DATA:
2164 case SECCAT_DATA_REL:
2165 case SECCAT_DATA_REL_LOCAL:
2166 case SECCAT_DATA_REL_RO:
2167 case SECCAT_DATA_REL_RO_LOCAL:
2168 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2169 break;
2170 case SECCAT_BSS:
2171 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2172 break;
2173 case SECCAT_RODATA:
2174 case SECCAT_RODATA_MERGE_STR:
2175 case SECCAT_RODATA_MERGE_STR_INIT:
2176 case SECCAT_RODATA_MERGE_CONST:
2177 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2178 break;
2179 case SECCAT_SRODATA:
2180 case SECCAT_SDATA:
2181 case SECCAT_SBSS:
2182 gcc_unreachable ();
2183 case SECCAT_TEXT:
2184 case SECCAT_TDATA:
2185 case SECCAT_TBSS:
2186 /* We don't split these for medium model. Place them into
2187 default sections and hope for best. */
2188 break;
2190 if (prefix)
2192 const char *name;
2193 size_t nlen, plen;
2194 char *string;
2195 plen = strlen (prefix);
2197 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2198 name = targetm.strip_name_encoding (name);
2199 nlen = strlen (name);
2201 string = alloca (nlen + plen + 1);
2202 memcpy (string, prefix, plen);
2203 memcpy (string + plen, name, nlen + 1);
2205 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2206 return;
2209 default_unique_section (decl, reloc);
2212 #ifdef COMMON_ASM_OP
2213 /* This says how to output assembler code to declare an
2214 uninitialized external linkage data object.
2216 For medium model x86-64 we need to use .largecomm opcode for
2217 large objects. */
2218 void
2219 x86_elf_aligned_common (FILE *file,
2220 const char *name, unsigned HOST_WIDE_INT size,
2221 int align)
2223 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2224 && size > (unsigned int)ix86_section_threshold)
2225 fprintf (file, ".largecomm\t");
2226 else
2227 fprintf (file, "%s", COMMON_ASM_OP);
2228 assemble_name (file, name);
2229 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2230 size, align / BITS_PER_UNIT);
2233 /* Utility function for targets to use in implementing
2234 ASM_OUTPUT_ALIGNED_BSS. */
2236 void
2237 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2238 const char *name, unsigned HOST_WIDE_INT size,
2239 int align)
2241 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2242 && size > (unsigned int)ix86_section_threshold)
2243 switch_to_section (get_named_section (decl, ".lbss", 0));
2244 else
2245 switch_to_section (bss_section);
2246 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2247 #ifdef ASM_DECLARE_OBJECT_NAME
2248 last_assemble_variable_decl = decl;
2249 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2250 #else
2251 /* Standard thing is just output label for the object. */
2252 ASM_OUTPUT_LABEL (file, name);
2253 #endif /* ASM_DECLARE_OBJECT_NAME */
2254 ASM_OUTPUT_SKIP (file, size ? size : 1);
2256 #endif
2258 void
2259 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2261 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2262 make the problem with not enough registers even worse. */
2263 #ifdef INSN_SCHEDULING
2264 if (level > 1)
2265 flag_schedule_insns = 0;
2266 #endif
2268 if (TARGET_MACHO)
2269 /* The Darwin libraries never set errno, so we might as well
2270 avoid calling them when that's the only reason we would. */
2271 flag_errno_math = 0;
2273 /* The default values of these switches depend on the TARGET_64BIT
2274 that is not known at this moment. Mark these values with 2 and
2275 let user the to override these. In case there is no command line option
2276 specifying them, we will set the defaults in override_options. */
2277 if (optimize >= 1)
2278 flag_omit_frame_pointer = 2;
2279 flag_pcc_struct_return = 2;
2280 flag_asynchronous_unwind_tables = 2;
2281 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2282 SUBTARGET_OPTIMIZATION_OPTIONS;
2283 #endif
2286 /* Table of valid machine attributes. */
2287 const struct attribute_spec ix86_attribute_table[] =
2289 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2290 /* Stdcall attribute says callee is responsible for popping arguments
2291 if they are not variable. */
2292 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2293 /* Fastcall attribute says callee is responsible for popping arguments
2294 if they are not variable. */
2295 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2296 /* Cdecl attribute says the callee is a normal C declaration */
2297 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2298 /* Regparm attribute specifies how many integer arguments are to be
2299 passed in registers. */
2300 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2301 /* Sseregparm attribute says we are using x86_64 calling conventions
2302 for FP arguments. */
2303 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2304 /* force_align_arg_pointer says this function realigns the stack at entry. */
2305 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2306 false, true, true, ix86_handle_cconv_attribute },
2307 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2308 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2309 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2310 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2311 #endif
2312 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2313 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2314 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2315 SUBTARGET_ATTRIBUTE_TABLE,
2316 #endif
2317 { NULL, 0, 0, false, false, false, NULL }
2320 /* Decide whether we can make a sibling call to a function. DECL is the
2321 declaration of the function being targeted by the call and EXP is the
2322 CALL_EXPR representing the call. */
2324 static bool
2325 ix86_function_ok_for_sibcall (tree decl, tree exp)
2327 tree func;
2328 rtx a, b;
2330 /* If we are generating position-independent code, we cannot sibcall
2331 optimize any indirect call, or a direct call to a global function,
2332 as the PLT requires %ebx be live. */
2333 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2334 return false;
2336 if (decl)
2337 func = decl;
2338 else
2340 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2341 if (POINTER_TYPE_P (func))
2342 func = TREE_TYPE (func);
2345 /* Check that the return value locations are the same. Like
2346 if we are returning floats on the 80387 register stack, we cannot
2347 make a sibcall from a function that doesn't return a float to a
2348 function that does or, conversely, from a function that does return
2349 a float to a function that doesn't; the necessary stack adjustment
2350 would not be executed. This is also the place we notice
2351 differences in the return value ABI. Note that it is ok for one
2352 of the functions to have void return type as long as the return
2353 value of the other is passed in a register. */
2354 a = ix86_function_value (TREE_TYPE (exp), func, false);
2355 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2356 cfun->decl, false);
2357 if (STACK_REG_P (a) || STACK_REG_P (b))
2359 if (!rtx_equal_p (a, b))
2360 return false;
2362 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2364 else if (!rtx_equal_p (a, b))
2365 return false;
2367 /* If this call is indirect, we'll need to be able to use a call-clobbered
2368 register for the address of the target function. Make sure that all
2369 such registers are not used for passing parameters. */
2370 if (!decl && !TARGET_64BIT)
2372 tree type;
2374 /* We're looking at the CALL_EXPR, we need the type of the function. */
2375 type = TREE_OPERAND (exp, 0); /* pointer expression */
2376 type = TREE_TYPE (type); /* pointer type */
2377 type = TREE_TYPE (type); /* function type */
2379 if (ix86_function_regparm (type, NULL) >= 3)
2381 /* ??? Need to count the actual number of registers to be used,
2382 not the possible number of registers. Fix later. */
2383 return false;
2387 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2388 /* Dllimport'd functions are also called indirectly. */
2389 if (decl && DECL_DLLIMPORT_P (decl)
2390 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2391 return false;
2392 #endif
2394 /* If we forced aligned the stack, then sibcalling would unalign the
2395 stack, which may break the called function. */
2396 if (cfun->machine->force_align_arg_pointer)
2397 return false;
2399 /* Otherwise okay. That also includes certain types of indirect calls. */
2400 return true;
2403 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2404 calling convention attributes;
2405 arguments as in struct attribute_spec.handler. */
2407 static tree
2408 ix86_handle_cconv_attribute (tree *node, tree name,
2409 tree args,
2410 int flags ATTRIBUTE_UNUSED,
2411 bool *no_add_attrs)
2413 if (TREE_CODE (*node) != FUNCTION_TYPE
2414 && TREE_CODE (*node) != METHOD_TYPE
2415 && TREE_CODE (*node) != FIELD_DECL
2416 && TREE_CODE (*node) != TYPE_DECL)
2418 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2419 IDENTIFIER_POINTER (name));
2420 *no_add_attrs = true;
2421 return NULL_TREE;
2424 /* Can combine regparm with all attributes but fastcall. */
2425 if (is_attribute_p ("regparm", name))
2427 tree cst;
2429 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2431 error ("fastcall and regparm attributes are not compatible");
2434 cst = TREE_VALUE (args);
2435 if (TREE_CODE (cst) != INTEGER_CST)
2437 warning (OPT_Wattributes,
2438 "%qs attribute requires an integer constant argument",
2439 IDENTIFIER_POINTER (name));
2440 *no_add_attrs = true;
2442 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2444 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2445 IDENTIFIER_POINTER (name), REGPARM_MAX);
2446 *no_add_attrs = true;
2449 if (!TARGET_64BIT
2450 && lookup_attribute (ix86_force_align_arg_pointer_string,
2451 TYPE_ATTRIBUTES (*node))
2452 && compare_tree_int (cst, REGPARM_MAX-1))
2454 error ("%s functions limited to %d register parameters",
2455 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2458 return NULL_TREE;
2461 if (TARGET_64BIT)
2463 warning (OPT_Wattributes, "%qs attribute ignored",
2464 IDENTIFIER_POINTER (name));
2465 *no_add_attrs = true;
2466 return NULL_TREE;
2469 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2470 if (is_attribute_p ("fastcall", name))
2472 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2474 error ("fastcall and cdecl attributes are not compatible");
2476 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2478 error ("fastcall and stdcall attributes are not compatible");
2480 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2482 error ("fastcall and regparm attributes are not compatible");
2486 /* Can combine stdcall with fastcall (redundant), regparm and
2487 sseregparm. */
2488 else if (is_attribute_p ("stdcall", name))
2490 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2492 error ("stdcall and cdecl attributes are not compatible");
2494 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2496 error ("stdcall and fastcall attributes are not compatible");
2500 /* Can combine cdecl with regparm and sseregparm. */
2501 else if (is_attribute_p ("cdecl", name))
2503 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2505 error ("stdcall and cdecl attributes are not compatible");
2507 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2509 error ("fastcall and cdecl attributes are not compatible");
2513 /* Can combine sseregparm with all attributes. */
2515 return NULL_TREE;
2518 /* Return 0 if the attributes for two types are incompatible, 1 if they
2519 are compatible, and 2 if they are nearly compatible (which causes a
2520 warning to be generated). */
2522 static int
2523 ix86_comp_type_attributes (tree type1, tree type2)
2525 /* Check for mismatch of non-default calling convention. */
2526 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2528 if (TREE_CODE (type1) != FUNCTION_TYPE)
2529 return 1;
2531 /* Check for mismatched fastcall/regparm types. */
2532 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2533 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2534 || (ix86_function_regparm (type1, NULL)
2535 != ix86_function_regparm (type2, NULL)))
2536 return 0;
2538 /* Check for mismatched sseregparm types. */
2539 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2540 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2541 return 0;
2543 /* Check for mismatched return types (cdecl vs stdcall). */
2544 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2545 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2546 return 0;
2548 return 1;
2551 /* Return the regparm value for a function with the indicated TYPE and DECL.
2552 DECL may be NULL when calling function indirectly
2553 or considering a libcall. */
2555 static int
2556 ix86_function_regparm (tree type, tree decl)
2558 tree attr;
2559 int regparm = ix86_regparm;
2560 bool user_convention = false;
2562 if (!TARGET_64BIT)
2564 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2565 if (attr)
2567 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2568 user_convention = true;
2571 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2573 regparm = 2;
2574 user_convention = true;
2577 /* Use register calling convention for local functions when possible. */
2578 if (!TARGET_64BIT && !user_convention && decl
2579 && flag_unit_at_a_time && !profile_flag)
2581 struct cgraph_local_info *i = cgraph_local_info (decl);
2582 if (i && i->local)
2584 int local_regparm, globals = 0, regno;
2586 /* Make sure no regparm register is taken by a global register
2587 variable. */
2588 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2589 if (global_regs[local_regparm])
2590 break;
2591 /* We can't use regparm(3) for nested functions as these use
2592 static chain pointer in third argument. */
2593 if (local_regparm == 3
2594 && decl_function_context (decl)
2595 && !DECL_NO_STATIC_CHAIN (decl))
2596 local_regparm = 2;
2597 /* If the function realigns its stackpointer, the
2598 prologue will clobber %ecx. If we've already
2599 generated code for the callee, the callee
2600 DECL_STRUCT_FUNCTION is gone, so we fall back to
2601 scanning the attributes for the self-realigning
2602 property. */
2603 if ((DECL_STRUCT_FUNCTION (decl)
2604 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2605 || (!DECL_STRUCT_FUNCTION (decl)
2606 && lookup_attribute (ix86_force_align_arg_pointer_string,
2607 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2608 local_regparm = 2;
2609 /* Each global register variable increases register preassure,
2610 so the more global reg vars there are, the smaller regparm
2611 optimization use, unless requested by the user explicitly. */
2612 for (regno = 0; regno < 6; regno++)
2613 if (global_regs[regno])
2614 globals++;
2615 local_regparm
2616 = globals < local_regparm ? local_regparm - globals : 0;
2618 if (local_regparm > regparm)
2619 regparm = local_regparm;
2623 return regparm;
2626 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2627 in SSE registers for a function with the indicated TYPE and DECL.
2628 DECL may be NULL when calling function indirectly
2629 or considering a libcall. Otherwise return 0. */
2631 static int
2632 ix86_function_sseregparm (tree type, tree decl)
2634 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2635 by the sseregparm attribute. */
2636 if (TARGET_SSEREGPARM
2637 || (type
2638 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2640 if (!TARGET_SSE)
2642 if (decl)
2643 error ("Calling %qD with attribute sseregparm without "
2644 "SSE/SSE2 enabled", decl);
2645 else
2646 error ("Calling %qT with attribute sseregparm without "
2647 "SSE/SSE2 enabled", type);
2648 return 0;
2651 return 2;
2654 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2655 in SSE registers even for 32-bit mode and not just 3, but up to
2656 8 SSE arguments in registers. */
2657 if (!TARGET_64BIT && decl
2658 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2660 struct cgraph_local_info *i = cgraph_local_info (decl);
2661 if (i && i->local)
2662 return TARGET_SSE2 ? 2 : 1;
2665 return 0;
2668 /* Return true if EAX is live at the start of the function. Used by
2669 ix86_expand_prologue to determine if we need special help before
2670 calling allocate_stack_worker. */
2672 static bool
2673 ix86_eax_live_at_start_p (void)
2675 /* Cheat. Don't bother working forward from ix86_function_regparm
2676 to the function type to whether an actual argument is located in
2677 eax. Instead just look at cfg info, which is still close enough
2678 to correct at this point. This gives false positives for broken
2679 functions that might use uninitialized data that happens to be
2680 allocated in eax, but who cares? */
2681 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2684 /* Value is the number of bytes of arguments automatically
2685 popped when returning from a subroutine call.
2686 FUNDECL is the declaration node of the function (as a tree),
2687 FUNTYPE is the data type of the function (as a tree),
2688 or for a library call it is an identifier node for the subroutine name.
2689 SIZE is the number of bytes of arguments passed on the stack.
2691 On the 80386, the RTD insn may be used to pop them if the number
2692 of args is fixed, but if the number is variable then the caller
2693 must pop them all. RTD can't be used for library calls now
2694 because the library is compiled with the Unix compiler.
2695 Use of RTD is a selectable option, since it is incompatible with
2696 standard Unix calling sequences. If the option is not selected,
2697 the caller must always pop the args.
2699 The attribute stdcall is equivalent to RTD on a per module basis. */
2702 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2704 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2706 /* Cdecl functions override -mrtd, and never pop the stack. */
2707 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2709 /* Stdcall and fastcall functions will pop the stack if not
2710 variable args. */
2711 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2712 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2713 rtd = 1;
2715 if (rtd
2716 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2717 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2718 == void_type_node)))
2719 return size;
2722 /* Lose any fake structure return argument if it is passed on the stack. */
2723 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2724 && !TARGET_64BIT
2725 && !KEEP_AGGREGATE_RETURN_POINTER)
2727 int nregs = ix86_function_regparm (funtype, fundecl);
2729 if (!nregs)
2730 return GET_MODE_SIZE (Pmode);
2733 return 0;
2736 /* Argument support functions. */
2738 /* Return true when register may be used to pass function parameters. */
2739 bool
2740 ix86_function_arg_regno_p (int regno)
2742 int i;
2743 if (!TARGET_64BIT)
2744 return (regno < REGPARM_MAX
2745 || (TARGET_MMX && MMX_REGNO_P (regno)
2746 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2747 || (TARGET_SSE && SSE_REGNO_P (regno)
2748 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2750 if (TARGET_SSE && SSE_REGNO_P (regno)
2751 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2752 return true;
2753 /* RAX is used as hidden argument to va_arg functions. */
2754 if (!regno)
2755 return true;
2756 for (i = 0; i < REGPARM_MAX; i++)
2757 if (regno == x86_64_int_parameter_registers[i])
2758 return true;
2759 return false;
2762 /* Return if we do not know how to pass TYPE solely in registers. */
2764 static bool
2765 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2767 if (must_pass_in_stack_var_size_or_pad (mode, type))
2768 return true;
2770 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2771 The layout_type routine is crafty and tries to trick us into passing
2772 currently unsupported vector types on the stack by using TImode. */
2773 return (!TARGET_64BIT && mode == TImode
2774 && type && TREE_CODE (type) != VECTOR_TYPE);
2777 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2778 for a call to a function whose data type is FNTYPE.
2779 For a library call, FNTYPE is 0. */
2781 void
2782 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2783 tree fntype, /* tree ptr for function decl */
2784 rtx libname, /* SYMBOL_REF of library name or 0 */
2785 tree fndecl)
2787 static CUMULATIVE_ARGS zero_cum;
2788 tree param, next_param;
2790 if (TARGET_DEBUG_ARG)
2792 fprintf (stderr, "\ninit_cumulative_args (");
2793 if (fntype)
2794 fprintf (stderr, "fntype code = %s, ret code = %s",
2795 tree_code_name[(int) TREE_CODE (fntype)],
2796 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2797 else
2798 fprintf (stderr, "no fntype");
2800 if (libname)
2801 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2804 *cum = zero_cum;
2806 /* Set up the number of registers to use for passing arguments. */
2807 cum->nregs = ix86_regparm;
2808 if (TARGET_SSE)
2809 cum->sse_nregs = SSE_REGPARM_MAX;
2810 if (TARGET_MMX)
2811 cum->mmx_nregs = MMX_REGPARM_MAX;
2812 cum->warn_sse = true;
2813 cum->warn_mmx = true;
2814 cum->maybe_vaarg = false;
2816 /* Use ecx and edx registers if function has fastcall attribute,
2817 else look for regparm information. */
2818 if (fntype && !TARGET_64BIT)
2820 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2822 cum->nregs = 2;
2823 cum->fastcall = 1;
2825 else
2826 cum->nregs = ix86_function_regparm (fntype, fndecl);
2829 /* Set up the number of SSE registers used for passing SFmode
2830 and DFmode arguments. Warn for mismatching ABI. */
2831 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2833 /* Determine if this function has variable arguments. This is
2834 indicated by the last argument being 'void_type_mode' if there
2835 are no variable arguments. If there are variable arguments, then
2836 we won't pass anything in registers in 32-bit mode. */
2838 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2840 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2841 param != 0; param = next_param)
2843 next_param = TREE_CHAIN (param);
2844 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2846 if (!TARGET_64BIT)
2848 cum->nregs = 0;
2849 cum->sse_nregs = 0;
2850 cum->mmx_nregs = 0;
2851 cum->warn_sse = 0;
2852 cum->warn_mmx = 0;
2853 cum->fastcall = 0;
2854 cum->float_in_sse = 0;
2856 cum->maybe_vaarg = true;
2860 if ((!fntype && !libname)
2861 || (fntype && !TYPE_ARG_TYPES (fntype)))
2862 cum->maybe_vaarg = true;
2864 if (TARGET_DEBUG_ARG)
2865 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2867 return;
2870 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2871 But in the case of vector types, it is some vector mode.
2873 When we have only some of our vector isa extensions enabled, then there
2874 are some modes for which vector_mode_supported_p is false. For these
2875 modes, the generic vector support in gcc will choose some non-vector mode
2876 in order to implement the type. By computing the natural mode, we'll
2877 select the proper ABI location for the operand and not depend on whatever
2878 the middle-end decides to do with these vector types. */
2880 static enum machine_mode
2881 type_natural_mode (tree type)
2883 enum machine_mode mode = TYPE_MODE (type);
2885 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2887 HOST_WIDE_INT size = int_size_in_bytes (type);
2888 if ((size == 8 || size == 16)
2889 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2890 && TYPE_VECTOR_SUBPARTS (type) > 1)
2892 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2894 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2895 mode = MIN_MODE_VECTOR_FLOAT;
2896 else
2897 mode = MIN_MODE_VECTOR_INT;
2899 /* Get the mode which has this inner mode and number of units. */
2900 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2901 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2902 && GET_MODE_INNER (mode) == innermode)
2903 return mode;
2905 gcc_unreachable ();
2909 return mode;
2912 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2913 this may not agree with the mode that the type system has chosen for the
2914 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2915 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2917 static rtx
2918 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2919 unsigned int regno)
2921 rtx tmp;
2923 if (orig_mode != BLKmode)
2924 tmp = gen_rtx_REG (orig_mode, regno);
2925 else
2927 tmp = gen_rtx_REG (mode, regno);
2928 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2929 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2932 return tmp;
2935 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2936 of this code is to classify each 8bytes of incoming argument by the register
2937 class and assign registers accordingly. */
2939 /* Return the union class of CLASS1 and CLASS2.
2940 See the x86-64 PS ABI for details. */
2942 static enum x86_64_reg_class
2943 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2945 /* Rule #1: If both classes are equal, this is the resulting class. */
2946 if (class1 == class2)
2947 return class1;
2949 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2950 the other class. */
2951 if (class1 == X86_64_NO_CLASS)
2952 return class2;
2953 if (class2 == X86_64_NO_CLASS)
2954 return class1;
2956 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2957 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2958 return X86_64_MEMORY_CLASS;
2960 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2961 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2962 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2963 return X86_64_INTEGERSI_CLASS;
2964 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2965 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2966 return X86_64_INTEGER_CLASS;
2968 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2969 MEMORY is used. */
2970 if (class1 == X86_64_X87_CLASS
2971 || class1 == X86_64_X87UP_CLASS
2972 || class1 == X86_64_COMPLEX_X87_CLASS
2973 || class2 == X86_64_X87_CLASS
2974 || class2 == X86_64_X87UP_CLASS
2975 || class2 == X86_64_COMPLEX_X87_CLASS)
2976 return X86_64_MEMORY_CLASS;
2978 /* Rule #6: Otherwise class SSE is used. */
2979 return X86_64_SSE_CLASS;
2982 /* Classify the argument of type TYPE and mode MODE.
2983 CLASSES will be filled by the register class used to pass each word
2984 of the operand. The number of words is returned. In case the parameter
2985 should be passed in memory, 0 is returned. As a special case for zero
2986 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2988 BIT_OFFSET is used internally for handling records and specifies offset
2989 of the offset in bits modulo 256 to avoid overflow cases.
2991 See the x86-64 PS ABI for details.
2994 static int
2995 classify_argument (enum machine_mode mode, tree type,
2996 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2998 HOST_WIDE_INT bytes =
2999 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3000 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3002 /* Variable sized entities are always passed/returned in memory. */
3003 if (bytes < 0)
3004 return 0;
3006 if (mode != VOIDmode
3007 && targetm.calls.must_pass_in_stack (mode, type))
3008 return 0;
3010 if (type && AGGREGATE_TYPE_P (type))
3012 int i;
3013 tree field;
3014 enum x86_64_reg_class subclasses[MAX_CLASSES];
3016 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3017 if (bytes > 16)
3018 return 0;
3020 for (i = 0; i < words; i++)
3021 classes[i] = X86_64_NO_CLASS;
3023 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3024 signalize memory class, so handle it as special case. */
3025 if (!words)
3027 classes[0] = X86_64_NO_CLASS;
3028 return 1;
3031 /* Classify each field of record and merge classes. */
3032 switch (TREE_CODE (type))
3034 case RECORD_TYPE:
3035 /* And now merge the fields of structure. */
3036 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3038 if (TREE_CODE (field) == FIELD_DECL)
3040 int num;
3042 if (TREE_TYPE (field) == error_mark_node)
3043 continue;
3045 /* Bitfields are always classified as integer. Handle them
3046 early, since later code would consider them to be
3047 misaligned integers. */
3048 if (DECL_BIT_FIELD (field))
3050 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3051 i < ((int_bit_position (field) + (bit_offset % 64))
3052 + tree_low_cst (DECL_SIZE (field), 0)
3053 + 63) / 8 / 8; i++)
3054 classes[i] =
3055 merge_classes (X86_64_INTEGER_CLASS,
3056 classes[i]);
3058 else
3060 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3061 TREE_TYPE (field), subclasses,
3062 (int_bit_position (field)
3063 + bit_offset) % 256);
3064 if (!num)
3065 return 0;
3066 for (i = 0; i < num; i++)
3068 int pos =
3069 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3070 classes[i + pos] =
3071 merge_classes (subclasses[i], classes[i + pos]);
3076 break;
3078 case ARRAY_TYPE:
3079 /* Arrays are handled as small records. */
3081 int num;
3082 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3083 TREE_TYPE (type), subclasses, bit_offset);
3084 if (!num)
3085 return 0;
3087 /* The partial classes are now full classes. */
3088 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3089 subclasses[0] = X86_64_SSE_CLASS;
3090 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3091 subclasses[0] = X86_64_INTEGER_CLASS;
3093 for (i = 0; i < words; i++)
3094 classes[i] = subclasses[i % num];
3096 break;
3098 case UNION_TYPE:
3099 case QUAL_UNION_TYPE:
3100 /* Unions are similar to RECORD_TYPE but offset is always 0.
3102 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3104 if (TREE_CODE (field) == FIELD_DECL)
3106 int num;
3108 if (TREE_TYPE (field) == error_mark_node)
3109 continue;
3111 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3112 TREE_TYPE (field), subclasses,
3113 bit_offset);
3114 if (!num)
3115 return 0;
3116 for (i = 0; i < num; i++)
3117 classes[i] = merge_classes (subclasses[i], classes[i]);
3120 break;
3122 default:
3123 gcc_unreachable ();
3126 /* Final merger cleanup. */
3127 for (i = 0; i < words; i++)
3129 /* If one class is MEMORY, everything should be passed in
3130 memory. */
3131 if (classes[i] == X86_64_MEMORY_CLASS)
3132 return 0;
3134 /* The X86_64_SSEUP_CLASS should be always preceded by
3135 X86_64_SSE_CLASS. */
3136 if (classes[i] == X86_64_SSEUP_CLASS
3137 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3138 classes[i] = X86_64_SSE_CLASS;
3140 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3141 if (classes[i] == X86_64_X87UP_CLASS
3142 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3143 classes[i] = X86_64_SSE_CLASS;
3145 return words;
3148 /* Compute alignment needed. We align all types to natural boundaries with
3149 exception of XFmode that is aligned to 64bits. */
3150 if (mode != VOIDmode && mode != BLKmode)
3152 int mode_alignment = GET_MODE_BITSIZE (mode);
3154 if (mode == XFmode)
3155 mode_alignment = 128;
3156 else if (mode == XCmode)
3157 mode_alignment = 256;
3158 if (COMPLEX_MODE_P (mode))
3159 mode_alignment /= 2;
3160 /* Misaligned fields are always returned in memory. */
3161 if (bit_offset % mode_alignment)
3162 return 0;
3165 /* for V1xx modes, just use the base mode */
3166 if (VECTOR_MODE_P (mode)
3167 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3168 mode = GET_MODE_INNER (mode);
3170 /* Classification of atomic types. */
3171 switch (mode)
3173 case SDmode:
3174 case DDmode:
3175 classes[0] = X86_64_SSE_CLASS;
3176 return 1;
3177 case TDmode:
3178 classes[0] = X86_64_SSE_CLASS;
3179 classes[1] = X86_64_SSEUP_CLASS;
3180 return 2;
3181 case DImode:
3182 case SImode:
3183 case HImode:
3184 case QImode:
3185 case CSImode:
3186 case CHImode:
3187 case CQImode:
3188 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3189 classes[0] = X86_64_INTEGERSI_CLASS;
3190 else
3191 classes[0] = X86_64_INTEGER_CLASS;
3192 return 1;
3193 case CDImode:
3194 case TImode:
3195 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3196 return 2;
3197 case CTImode:
3198 return 0;
3199 case SFmode:
3200 if (!(bit_offset % 64))
3201 classes[0] = X86_64_SSESF_CLASS;
3202 else
3203 classes[0] = X86_64_SSE_CLASS;
3204 return 1;
3205 case DFmode:
3206 classes[0] = X86_64_SSEDF_CLASS;
3207 return 1;
3208 case XFmode:
3209 classes[0] = X86_64_X87_CLASS;
3210 classes[1] = X86_64_X87UP_CLASS;
3211 return 2;
3212 case TFmode:
3213 classes[0] = X86_64_SSE_CLASS;
3214 classes[1] = X86_64_SSEUP_CLASS;
3215 return 2;
3216 case SCmode:
3217 classes[0] = X86_64_SSE_CLASS;
3218 return 1;
3219 case DCmode:
3220 classes[0] = X86_64_SSEDF_CLASS;
3221 classes[1] = X86_64_SSEDF_CLASS;
3222 return 2;
3223 case XCmode:
3224 classes[0] = X86_64_COMPLEX_X87_CLASS;
3225 return 1;
3226 case TCmode:
3227 /* This modes is larger than 16 bytes. */
3228 return 0;
3229 case V4SFmode:
3230 case V4SImode:
3231 case V16QImode:
3232 case V8HImode:
3233 case V2DFmode:
3234 case V2DImode:
3235 classes[0] = X86_64_SSE_CLASS;
3236 classes[1] = X86_64_SSEUP_CLASS;
3237 return 2;
3238 case V2SFmode:
3239 case V2SImode:
3240 case V4HImode:
3241 case V8QImode:
3242 classes[0] = X86_64_SSE_CLASS;
3243 return 1;
3244 case BLKmode:
3245 case VOIDmode:
3246 return 0;
3247 default:
3248 gcc_assert (VECTOR_MODE_P (mode));
3250 if (bytes > 16)
3251 return 0;
3253 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3255 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3256 classes[0] = X86_64_INTEGERSI_CLASS;
3257 else
3258 classes[0] = X86_64_INTEGER_CLASS;
3259 classes[1] = X86_64_INTEGER_CLASS;
3260 return 1 + (bytes > 8);
3264 /* Examine the argument and return set number of register required in each
3265 class. Return 0 iff parameter should be passed in memory. */
3266 static int
3267 examine_argument (enum machine_mode mode, tree type, int in_return,
3268 int *int_nregs, int *sse_nregs)
3270 enum x86_64_reg_class class[MAX_CLASSES];
3271 int n = classify_argument (mode, type, class, 0);
3273 *int_nregs = 0;
3274 *sse_nregs = 0;
3275 if (!n)
3276 return 0;
3277 for (n--; n >= 0; n--)
3278 switch (class[n])
3280 case X86_64_INTEGER_CLASS:
3281 case X86_64_INTEGERSI_CLASS:
3282 (*int_nregs)++;
3283 break;
3284 case X86_64_SSE_CLASS:
3285 case X86_64_SSESF_CLASS:
3286 case X86_64_SSEDF_CLASS:
3287 (*sse_nregs)++;
3288 break;
3289 case X86_64_NO_CLASS:
3290 case X86_64_SSEUP_CLASS:
3291 break;
3292 case X86_64_X87_CLASS:
3293 case X86_64_X87UP_CLASS:
3294 if (!in_return)
3295 return 0;
3296 break;
3297 case X86_64_COMPLEX_X87_CLASS:
3298 return in_return ? 2 : 0;
3299 case X86_64_MEMORY_CLASS:
3300 gcc_unreachable ();
3302 return 1;
3305 /* Construct container for the argument used by GCC interface. See
3306 FUNCTION_ARG for the detailed description. */
3308 static rtx
3309 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3310 tree type, int in_return, int nintregs, int nsseregs,
3311 const int *intreg, int sse_regno)
3313 /* The following variables hold the static issued_error state. */
3314 static bool issued_sse_arg_error;
3315 static bool issued_sse_ret_error;
3316 static bool issued_x87_ret_error;
3318 enum machine_mode tmpmode;
3319 int bytes =
3320 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3321 enum x86_64_reg_class class[MAX_CLASSES];
3322 int n;
3323 int i;
3324 int nexps = 0;
3325 int needed_sseregs, needed_intregs;
3326 rtx exp[MAX_CLASSES];
3327 rtx ret;
3329 n = classify_argument (mode, type, class, 0);
3330 if (TARGET_DEBUG_ARG)
3332 if (!n)
3333 fprintf (stderr, "Memory class\n");
3334 else
3336 fprintf (stderr, "Classes:");
3337 for (i = 0; i < n; i++)
3339 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3341 fprintf (stderr, "\n");
3344 if (!n)
3345 return NULL;
3346 if (!examine_argument (mode, type, in_return, &needed_intregs,
3347 &needed_sseregs))
3348 return NULL;
3349 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3350 return NULL;
3352 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3353 some less clueful developer tries to use floating-point anyway. */
3354 if (needed_sseregs && !TARGET_SSE)
3356 if (in_return)
3358 if (!issued_sse_ret_error)
3360 error ("SSE register return with SSE disabled");
3361 issued_sse_ret_error = true;
3364 else if (!issued_sse_arg_error)
3366 error ("SSE register argument with SSE disabled");
3367 issued_sse_arg_error = true;
3369 return NULL;
3372 /* Likewise, error if the ABI requires us to return values in the
3373 x87 registers and the user specified -mno-80387. */
3374 if (!TARGET_80387 && in_return)
3375 for (i = 0; i < n; i++)
3376 if (class[i] == X86_64_X87_CLASS
3377 || class[i] == X86_64_X87UP_CLASS
3378 || class[i] == X86_64_COMPLEX_X87_CLASS)
3380 if (!issued_x87_ret_error)
3382 error ("x87 register return with x87 disabled");
3383 issued_x87_ret_error = true;
3385 return NULL;
3388 /* First construct simple cases. Avoid SCmode, since we want to use
3389 single register to pass this type. */
3390 if (n == 1 && mode != SCmode)
3391 switch (class[0])
3393 case X86_64_INTEGER_CLASS:
3394 case X86_64_INTEGERSI_CLASS:
3395 return gen_rtx_REG (mode, intreg[0]);
3396 case X86_64_SSE_CLASS:
3397 case X86_64_SSESF_CLASS:
3398 case X86_64_SSEDF_CLASS:
3399 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3400 case X86_64_X87_CLASS:
3401 case X86_64_COMPLEX_X87_CLASS:
3402 return gen_rtx_REG (mode, FIRST_STACK_REG);
3403 case X86_64_NO_CLASS:
3404 /* Zero sized array, struct or class. */
3405 return NULL;
3406 default:
3407 gcc_unreachable ();
3409 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3410 && mode != BLKmode)
3411 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3412 if (n == 2
3413 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3414 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3415 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3416 && class[1] == X86_64_INTEGER_CLASS
3417 && (mode == CDImode || mode == TImode || mode == TFmode)
3418 && intreg[0] + 1 == intreg[1])
3419 return gen_rtx_REG (mode, intreg[0]);
3421 /* Otherwise figure out the entries of the PARALLEL. */
3422 for (i = 0; i < n; i++)
3424 switch (class[i])
3426 case X86_64_NO_CLASS:
3427 break;
3428 case X86_64_INTEGER_CLASS:
3429 case X86_64_INTEGERSI_CLASS:
3430 /* Merge TImodes on aligned occasions here too. */
3431 if (i * 8 + 8 > bytes)
3432 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3433 else if (class[i] == X86_64_INTEGERSI_CLASS)
3434 tmpmode = SImode;
3435 else
3436 tmpmode = DImode;
3437 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3438 if (tmpmode == BLKmode)
3439 tmpmode = DImode;
3440 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3441 gen_rtx_REG (tmpmode, *intreg),
3442 GEN_INT (i*8));
3443 intreg++;
3444 break;
3445 case X86_64_SSESF_CLASS:
3446 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3447 gen_rtx_REG (SFmode,
3448 SSE_REGNO (sse_regno)),
3449 GEN_INT (i*8));
3450 sse_regno++;
3451 break;
3452 case X86_64_SSEDF_CLASS:
3453 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3454 gen_rtx_REG (DFmode,
3455 SSE_REGNO (sse_regno)),
3456 GEN_INT (i*8));
3457 sse_regno++;
3458 break;
3459 case X86_64_SSE_CLASS:
3460 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3461 tmpmode = TImode;
3462 else
3463 tmpmode = DImode;
3464 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3465 gen_rtx_REG (tmpmode,
3466 SSE_REGNO (sse_regno)),
3467 GEN_INT (i*8));
3468 if (tmpmode == TImode)
3469 i++;
3470 sse_regno++;
3471 break;
3472 default:
3473 gcc_unreachable ();
3477 /* Empty aligned struct, union or class. */
3478 if (nexps == 0)
3479 return NULL;
3481 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3482 for (i = 0; i < nexps; i++)
3483 XVECEXP (ret, 0, i) = exp [i];
3484 return ret;
3487 /* Update the data in CUM to advance over an argument
3488 of mode MODE and data type TYPE.
3489 (TYPE is null for libcalls where that information may not be available.) */
3491 void
3492 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3493 tree type, int named)
3495 int bytes =
3496 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3497 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3499 if (type)
3500 mode = type_natural_mode (type);
3502 if (TARGET_DEBUG_ARG)
3503 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3504 "mode=%s, named=%d)\n\n",
3505 words, cum->words, cum->nregs, cum->sse_nregs,
3506 GET_MODE_NAME (mode), named);
3508 if (TARGET_64BIT)
3510 int int_nregs, sse_nregs;
3511 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3512 cum->words += words;
3513 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3515 cum->nregs -= int_nregs;
3516 cum->sse_nregs -= sse_nregs;
3517 cum->regno += int_nregs;
3518 cum->sse_regno += sse_nregs;
3520 else
3521 cum->words += words;
3523 else
3525 switch (mode)
3527 default:
3528 break;
3530 case BLKmode:
3531 if (bytes < 0)
3532 break;
3533 /* FALLTHRU */
3535 case DImode:
3536 case SImode:
3537 case HImode:
3538 case QImode:
3539 cum->words += words;
3540 cum->nregs -= words;
3541 cum->regno += words;
3543 if (cum->nregs <= 0)
3545 cum->nregs = 0;
3546 cum->regno = 0;
3548 break;
3550 case DFmode:
3551 if (cum->float_in_sse < 2)
3552 break;
3553 case SFmode:
3554 if (cum->float_in_sse < 1)
3555 break;
3556 /* FALLTHRU */
3558 case TImode:
3559 case V16QImode:
3560 case V8HImode:
3561 case V4SImode:
3562 case V2DImode:
3563 case V4SFmode:
3564 case V2DFmode:
3565 if (!type || !AGGREGATE_TYPE_P (type))
3567 cum->sse_words += words;
3568 cum->sse_nregs -= 1;
3569 cum->sse_regno += 1;
3570 if (cum->sse_nregs <= 0)
3572 cum->sse_nregs = 0;
3573 cum->sse_regno = 0;
3576 break;
3578 case V8QImode:
3579 case V4HImode:
3580 case V2SImode:
3581 case V2SFmode:
3582 if (!type || !AGGREGATE_TYPE_P (type))
3584 cum->mmx_words += words;
3585 cum->mmx_nregs -= 1;
3586 cum->mmx_regno += 1;
3587 if (cum->mmx_nregs <= 0)
3589 cum->mmx_nregs = 0;
3590 cum->mmx_regno = 0;
3593 break;
3598 /* Define where to put the arguments to a function.
3599 Value is zero to push the argument on the stack,
3600 or a hard register in which to store the argument.
3602 MODE is the argument's machine mode.
3603 TYPE is the data type of the argument (as a tree).
3604 This is null for libcalls where that information may
3605 not be available.
3606 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3607 the preceding args and about the function being called.
3608 NAMED is nonzero if this argument is a named parameter
3609 (otherwise it is an extra parameter matching an ellipsis). */
3612 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3613 tree type, int named)
3615 enum machine_mode mode = orig_mode;
3616 rtx ret = NULL_RTX;
3617 int bytes =
3618 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3619 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3620 static bool warnedsse, warnedmmx;
3622 /* To simplify the code below, represent vector types with a vector mode
3623 even if MMX/SSE are not active. */
3624 if (type && TREE_CODE (type) == VECTOR_TYPE)
3625 mode = type_natural_mode (type);
3627 /* Handle a hidden AL argument containing number of registers for varargs
3628 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3629 any AL settings. */
3630 if (mode == VOIDmode)
3632 if (TARGET_64BIT)
3633 return GEN_INT (cum->maybe_vaarg
3634 ? (cum->sse_nregs < 0
3635 ? SSE_REGPARM_MAX
3636 : cum->sse_regno)
3637 : -1);
3638 else
3639 return constm1_rtx;
3641 if (TARGET_64BIT)
3642 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3643 cum->sse_nregs,
3644 &x86_64_int_parameter_registers [cum->regno],
3645 cum->sse_regno);
3646 else
3647 switch (mode)
3649 /* For now, pass fp/complex values on the stack. */
3650 default:
3651 break;
3653 case BLKmode:
3654 if (bytes < 0)
3655 break;
3656 /* FALLTHRU */
3657 case DImode:
3658 case SImode:
3659 case HImode:
3660 case QImode:
3661 if (words <= cum->nregs)
3663 int regno = cum->regno;
3665 /* Fastcall allocates the first two DWORD (SImode) or
3666 smaller arguments to ECX and EDX. */
3667 if (cum->fastcall)
3669 if (mode == BLKmode || mode == DImode)
3670 break;
3672 /* ECX not EAX is the first allocated register. */
3673 if (regno == 0)
3674 regno = 2;
3676 ret = gen_rtx_REG (mode, regno);
3678 break;
3679 case DFmode:
3680 if (cum->float_in_sse < 2)
3681 break;
3682 case SFmode:
3683 if (cum->float_in_sse < 1)
3684 break;
3685 /* FALLTHRU */
3686 case TImode:
3687 case V16QImode:
3688 case V8HImode:
3689 case V4SImode:
3690 case V2DImode:
3691 case V4SFmode:
3692 case V2DFmode:
3693 if (!type || !AGGREGATE_TYPE_P (type))
3695 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3697 warnedsse = true;
3698 warning (0, "SSE vector argument without SSE enabled "
3699 "changes the ABI");
3701 if (cum->sse_nregs)
3702 ret = gen_reg_or_parallel (mode, orig_mode,
3703 cum->sse_regno + FIRST_SSE_REG);
3705 break;
3706 case V8QImode:
3707 case V4HImode:
3708 case V2SImode:
3709 case V2SFmode:
3710 if (!type || !AGGREGATE_TYPE_P (type))
3712 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3714 warnedmmx = true;
3715 warning (0, "MMX vector argument without MMX enabled "
3716 "changes the ABI");
3718 if (cum->mmx_nregs)
3719 ret = gen_reg_or_parallel (mode, orig_mode,
3720 cum->mmx_regno + FIRST_MMX_REG);
3722 break;
3725 if (TARGET_DEBUG_ARG)
3727 fprintf (stderr,
3728 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3729 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3731 if (ret)
3732 print_simple_rtl (stderr, ret);
3733 else
3734 fprintf (stderr, ", stack");
3736 fprintf (stderr, " )\n");
3739 return ret;
3742 /* A C expression that indicates when an argument must be passed by
3743 reference. If nonzero for an argument, a copy of that argument is
3744 made in memory and a pointer to the argument is passed instead of
3745 the argument itself. The pointer is passed in whatever way is
3746 appropriate for passing a pointer to that type. */
3748 static bool
3749 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3750 enum machine_mode mode ATTRIBUTE_UNUSED,
3751 tree type, bool named ATTRIBUTE_UNUSED)
3753 if (!TARGET_64BIT)
3754 return 0;
3756 if (type && int_size_in_bytes (type) == -1)
3758 if (TARGET_DEBUG_ARG)
3759 fprintf (stderr, "function_arg_pass_by_reference\n");
3760 return 1;
3763 return 0;
3766 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3767 ABI. Only called if TARGET_SSE. */
3768 static bool
3769 contains_128bit_aligned_vector_p (tree type)
3771 enum machine_mode mode = TYPE_MODE (type);
3772 if (SSE_REG_MODE_P (mode)
3773 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3774 return true;
3775 if (TYPE_ALIGN (type) < 128)
3776 return false;
3778 if (AGGREGATE_TYPE_P (type))
3780 /* Walk the aggregates recursively. */
3781 switch (TREE_CODE (type))
3783 case RECORD_TYPE:
3784 case UNION_TYPE:
3785 case QUAL_UNION_TYPE:
3787 tree field;
3789 /* Walk all the structure fields. */
3790 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3792 if (TREE_CODE (field) == FIELD_DECL
3793 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3794 return true;
3796 break;
3799 case ARRAY_TYPE:
3800 /* Just for use if some languages passes arrays by value. */
3801 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3802 return true;
3803 break;
3805 default:
3806 gcc_unreachable ();
3809 return false;
3812 /* Gives the alignment boundary, in bits, of an argument with the
3813 specified mode and type. */
3816 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3818 int align;
3819 if (type)
3820 align = TYPE_ALIGN (type);
3821 else
3822 align = GET_MODE_ALIGNMENT (mode);
3823 if (align < PARM_BOUNDARY)
3824 align = PARM_BOUNDARY;
3825 if (!TARGET_64BIT)
3827 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3828 make an exception for SSE modes since these require 128bit
3829 alignment.
3831 The handling here differs from field_alignment. ICC aligns MMX
3832 arguments to 4 byte boundaries, while structure fields are aligned
3833 to 8 byte boundaries. */
3834 if (!TARGET_SSE)
3835 align = PARM_BOUNDARY;
3836 else if (!type)
3838 if (!SSE_REG_MODE_P (mode))
3839 align = PARM_BOUNDARY;
3841 else
3843 if (!contains_128bit_aligned_vector_p (type))
3844 align = PARM_BOUNDARY;
3847 if (align > 128)
3848 align = 128;
3849 return align;
3852 /* Return true if N is a possible register number of function value. */
3853 bool
3854 ix86_function_value_regno_p (int regno)
3856 if (regno == 0
3857 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3858 || (regno == FIRST_SSE_REG && TARGET_SSE))
3859 return true;
3861 if (!TARGET_64BIT
3862 && (regno == FIRST_MMX_REG && TARGET_MMX))
3863 return true;
3865 return false;
3868 /* Define how to find the value returned by a function.
3869 VALTYPE is the data type of the value (as a tree).
3870 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3871 otherwise, FUNC is 0. */
3873 ix86_function_value (tree valtype, tree fntype_or_decl,
3874 bool outgoing ATTRIBUTE_UNUSED)
3876 enum machine_mode natmode = type_natural_mode (valtype);
3878 if (TARGET_64BIT)
3880 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3881 1, REGPARM_MAX, SSE_REGPARM_MAX,
3882 x86_64_int_return_registers, 0);
3883 /* For zero sized structures, construct_container return NULL, but we
3884 need to keep rest of compiler happy by returning meaningful value. */
3885 if (!ret)
3886 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3887 return ret;
3889 else
3891 tree fn = NULL_TREE, fntype;
3892 if (fntype_or_decl
3893 && DECL_P (fntype_or_decl))
3894 fn = fntype_or_decl;
3895 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3896 return gen_rtx_REG (TYPE_MODE (valtype),
3897 ix86_value_regno (natmode, fn, fntype));
3901 /* Return true iff type is returned in memory. */
3903 ix86_return_in_memory (tree type)
3905 int needed_intregs, needed_sseregs, size;
3906 enum machine_mode mode = type_natural_mode (type);
3908 if (TARGET_64BIT)
3909 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3911 if (mode == BLKmode)
3912 return 1;
3914 size = int_size_in_bytes (type);
3916 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3917 return 0;
3919 if (VECTOR_MODE_P (mode) || mode == TImode)
3921 /* User-created vectors small enough to fit in EAX. */
3922 if (size < 8)
3923 return 0;
3925 /* MMX/3dNow values are returned in MM0,
3926 except when it doesn't exits. */
3927 if (size == 8)
3928 return (TARGET_MMX ? 0 : 1);
3930 /* SSE values are returned in XMM0, except when it doesn't exist. */
3931 if (size == 16)
3932 return (TARGET_SSE ? 0 : 1);
3935 if (mode == XFmode)
3936 return 0;
3938 if (mode == TDmode)
3939 return 1;
3941 if (size > 12)
3942 return 1;
3943 return 0;
3946 /* When returning SSE vector types, we have a choice of either
3947 (1) being abi incompatible with a -march switch, or
3948 (2) generating an error.
3949 Given no good solution, I think the safest thing is one warning.
3950 The user won't be able to use -Werror, but....
3952 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3953 called in response to actually generating a caller or callee that
3954 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3955 via aggregate_value_p for general type probing from tree-ssa. */
3957 static rtx
3958 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3960 static bool warnedsse, warnedmmx;
3962 if (type)
3964 /* Look at the return type of the function, not the function type. */
3965 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3967 if (!TARGET_SSE && !warnedsse)
3969 if (mode == TImode
3970 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3972 warnedsse = true;
3973 warning (0, "SSE vector return without SSE enabled "
3974 "changes the ABI");
3978 if (!TARGET_MMX && !warnedmmx)
3980 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3982 warnedmmx = true;
3983 warning (0, "MMX vector return without MMX enabled "
3984 "changes the ABI");
3989 return NULL;
3992 /* Define how to find the value returned by a library function
3993 assuming the value has mode MODE. */
3995 ix86_libcall_value (enum machine_mode mode)
3997 if (TARGET_64BIT)
3999 switch (mode)
4001 case SFmode:
4002 case SCmode:
4003 case DFmode:
4004 case DCmode:
4005 case TFmode:
4006 case SDmode:
4007 case DDmode:
4008 case TDmode:
4009 return gen_rtx_REG (mode, FIRST_SSE_REG);
4010 case XFmode:
4011 case XCmode:
4012 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4013 case TCmode:
4014 return NULL;
4015 default:
4016 return gen_rtx_REG (mode, 0);
4019 else
4020 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4023 /* Given a mode, return the register to use for a return value. */
4025 static int
4026 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4028 gcc_assert (!TARGET_64BIT);
4030 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4031 we normally prevent this case when mmx is not available. However
4032 some ABIs may require the result to be returned like DImode. */
4033 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4034 return TARGET_MMX ? FIRST_MMX_REG : 0;
4036 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4037 we prevent this case when sse is not available. However some ABIs
4038 may require the result to be returned like integer TImode. */
4039 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4040 return TARGET_SSE ? FIRST_SSE_REG : 0;
4042 /* Decimal floating point values can go in %eax, unlike other float modes. */
4043 if (DECIMAL_FLOAT_MODE_P (mode))
4044 return 0;
4046 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4047 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4048 return 0;
4050 /* Floating point return values in %st(0), except for local functions when
4051 SSE math is enabled or for functions with sseregparm attribute. */
4052 if ((func || fntype)
4053 && (mode == SFmode || mode == DFmode))
4055 int sse_level = ix86_function_sseregparm (fntype, func);
4056 if ((sse_level >= 1 && mode == SFmode)
4057 || (sse_level == 2 && mode == DFmode))
4058 return FIRST_SSE_REG;
4061 return FIRST_FLOAT_REG;
4064 /* Create the va_list data type. */
4066 static tree
4067 ix86_build_builtin_va_list (void)
4069 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4071 /* For i386 we use plain pointer to argument area. */
4072 if (!TARGET_64BIT)
4073 return build_pointer_type (char_type_node);
4075 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4076 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4078 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4079 unsigned_type_node);
4080 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4081 unsigned_type_node);
4082 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4083 ptr_type_node);
4084 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4085 ptr_type_node);
4087 va_list_gpr_counter_field = f_gpr;
4088 va_list_fpr_counter_field = f_fpr;
4090 DECL_FIELD_CONTEXT (f_gpr) = record;
4091 DECL_FIELD_CONTEXT (f_fpr) = record;
4092 DECL_FIELD_CONTEXT (f_ovf) = record;
4093 DECL_FIELD_CONTEXT (f_sav) = record;
4095 TREE_CHAIN (record) = type_decl;
4096 TYPE_NAME (record) = type_decl;
4097 TYPE_FIELDS (record) = f_gpr;
4098 TREE_CHAIN (f_gpr) = f_fpr;
4099 TREE_CHAIN (f_fpr) = f_ovf;
4100 TREE_CHAIN (f_ovf) = f_sav;
4102 layout_type (record);
4104 /* The correct type is an array type of one element. */
4105 return build_array_type (record, build_index_type (size_zero_node));
4108 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4110 static void
4111 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4112 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4113 int no_rtl)
4115 CUMULATIVE_ARGS next_cum;
4116 rtx save_area = NULL_RTX, mem;
4117 rtx label;
4118 rtx label_ref;
4119 rtx tmp_reg;
4120 rtx nsse_reg;
4121 int set;
4122 tree fntype;
4123 int stdarg_p;
4124 int i;
4126 if (!TARGET_64BIT)
4127 return;
4129 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4130 return;
4132 /* Indicate to allocate space on the stack for varargs save area. */
4133 ix86_save_varrargs_registers = 1;
4135 cfun->stack_alignment_needed = 128;
4137 fntype = TREE_TYPE (current_function_decl);
4138 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4139 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4140 != void_type_node));
4142 /* For varargs, we do not want to skip the dummy va_dcl argument.
4143 For stdargs, we do want to skip the last named argument. */
4144 next_cum = *cum;
4145 if (stdarg_p)
4146 function_arg_advance (&next_cum, mode, type, 1);
4148 if (!no_rtl)
4149 save_area = frame_pointer_rtx;
4151 set = get_varargs_alias_set ();
4153 for (i = next_cum.regno;
4154 i < ix86_regparm
4155 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4156 i++)
4158 mem = gen_rtx_MEM (Pmode,
4159 plus_constant (save_area, i * UNITS_PER_WORD));
4160 MEM_NOTRAP_P (mem) = 1;
4161 set_mem_alias_set (mem, set);
4162 emit_move_insn (mem, gen_rtx_REG (Pmode,
4163 x86_64_int_parameter_registers[i]));
4166 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4168 /* Now emit code to save SSE registers. The AX parameter contains number
4169 of SSE parameter registers used to call this function. We use
4170 sse_prologue_save insn template that produces computed jump across
4171 SSE saves. We need some preparation work to get this working. */
4173 label = gen_label_rtx ();
4174 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4176 /* Compute address to jump to :
4177 label - 5*eax + nnamed_sse_arguments*5 */
4178 tmp_reg = gen_reg_rtx (Pmode);
4179 nsse_reg = gen_reg_rtx (Pmode);
4180 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4181 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4182 gen_rtx_MULT (Pmode, nsse_reg,
4183 GEN_INT (4))));
4184 if (next_cum.sse_regno)
4185 emit_move_insn
4186 (nsse_reg,
4187 gen_rtx_CONST (DImode,
4188 gen_rtx_PLUS (DImode,
4189 label_ref,
4190 GEN_INT (next_cum.sse_regno * 4))));
4191 else
4192 emit_move_insn (nsse_reg, label_ref);
4193 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4195 /* Compute address of memory block we save into. We always use pointer
4196 pointing 127 bytes after first byte to store - this is needed to keep
4197 instruction size limited by 4 bytes. */
4198 tmp_reg = gen_reg_rtx (Pmode);
4199 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4200 plus_constant (save_area,
4201 8 * REGPARM_MAX + 127)));
4202 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4203 MEM_NOTRAP_P (mem) = 1;
4204 set_mem_alias_set (mem, set);
4205 set_mem_align (mem, BITS_PER_WORD);
4207 /* And finally do the dirty job! */
4208 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4209 GEN_INT (next_cum.sse_regno), label));
4214 /* Implement va_start. */
4216 void
4217 ix86_va_start (tree valist, rtx nextarg)
4219 HOST_WIDE_INT words, n_gpr, n_fpr;
4220 tree f_gpr, f_fpr, f_ovf, f_sav;
4221 tree gpr, fpr, ovf, sav, t;
4222 tree type;
4224 /* Only 64bit target needs something special. */
4225 if (!TARGET_64BIT)
4227 std_expand_builtin_va_start (valist, nextarg);
4228 return;
4231 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4232 f_fpr = TREE_CHAIN (f_gpr);
4233 f_ovf = TREE_CHAIN (f_fpr);
4234 f_sav = TREE_CHAIN (f_ovf);
4236 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4237 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4238 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4239 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4240 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4242 /* Count number of gp and fp argument registers used. */
4243 words = current_function_args_info.words;
4244 n_gpr = current_function_args_info.regno;
4245 n_fpr = current_function_args_info.sse_regno;
4247 if (TARGET_DEBUG_ARG)
4248 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4249 (int) words, (int) n_gpr, (int) n_fpr);
4251 if (cfun->va_list_gpr_size)
4253 type = TREE_TYPE (gpr);
4254 t = build2 (MODIFY_EXPR, type, gpr,
4255 build_int_cst (type, n_gpr * 8));
4256 TREE_SIDE_EFFECTS (t) = 1;
4257 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4260 if (cfun->va_list_fpr_size)
4262 type = TREE_TYPE (fpr);
4263 t = build2 (MODIFY_EXPR, type, fpr,
4264 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4265 TREE_SIDE_EFFECTS (t) = 1;
4266 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4269 /* Find the overflow area. */
4270 type = TREE_TYPE (ovf);
4271 t = make_tree (type, virtual_incoming_args_rtx);
4272 if (words != 0)
4273 t = build2 (PLUS_EXPR, type, t,
4274 build_int_cst (type, words * UNITS_PER_WORD));
4275 t = build2 (MODIFY_EXPR, type, ovf, t);
4276 TREE_SIDE_EFFECTS (t) = 1;
4277 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4279 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4281 /* Find the register save area.
4282 Prologue of the function save it right above stack frame. */
4283 type = TREE_TYPE (sav);
4284 t = make_tree (type, frame_pointer_rtx);
4285 t = build2 (MODIFY_EXPR, type, sav, t);
4286 TREE_SIDE_EFFECTS (t) = 1;
4287 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4291 /* Implement va_arg. */
4293 tree
4294 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4296 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4297 tree f_gpr, f_fpr, f_ovf, f_sav;
4298 tree gpr, fpr, ovf, sav, t;
4299 int size, rsize;
4300 tree lab_false, lab_over = NULL_TREE;
4301 tree addr, t2;
4302 rtx container;
4303 int indirect_p = 0;
4304 tree ptrtype;
4305 enum machine_mode nat_mode;
4307 /* Only 64bit target needs something special. */
4308 if (!TARGET_64BIT)
4309 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4311 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4312 f_fpr = TREE_CHAIN (f_gpr);
4313 f_ovf = TREE_CHAIN (f_fpr);
4314 f_sav = TREE_CHAIN (f_ovf);
4316 valist = build_va_arg_indirect_ref (valist);
4317 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4318 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4319 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4320 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4322 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4323 if (indirect_p)
4324 type = build_pointer_type (type);
4325 size = int_size_in_bytes (type);
4326 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4328 nat_mode = type_natural_mode (type);
4329 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4330 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4332 /* Pull the value out of the saved registers. */
4334 addr = create_tmp_var (ptr_type_node, "addr");
4335 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4337 if (container)
4339 int needed_intregs, needed_sseregs;
4340 bool need_temp;
4341 tree int_addr, sse_addr;
4343 lab_false = create_artificial_label ();
4344 lab_over = create_artificial_label ();
4346 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4348 need_temp = (!REG_P (container)
4349 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4350 || TYPE_ALIGN (type) > 128));
4352 /* In case we are passing structure, verify that it is consecutive block
4353 on the register save area. If not we need to do moves. */
4354 if (!need_temp && !REG_P (container))
4356 /* Verify that all registers are strictly consecutive */
4357 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4359 int i;
4361 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4363 rtx slot = XVECEXP (container, 0, i);
4364 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4365 || INTVAL (XEXP (slot, 1)) != i * 16)
4366 need_temp = 1;
4369 else
4371 int i;
4373 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4375 rtx slot = XVECEXP (container, 0, i);
4376 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4377 || INTVAL (XEXP (slot, 1)) != i * 8)
4378 need_temp = 1;
4382 if (!need_temp)
4384 int_addr = addr;
4385 sse_addr = addr;
4387 else
4389 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4390 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4391 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4392 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4395 /* First ensure that we fit completely in registers. */
4396 if (needed_intregs)
4398 t = build_int_cst (TREE_TYPE (gpr),
4399 (REGPARM_MAX - needed_intregs + 1) * 8);
4400 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4401 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4402 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4403 gimplify_and_add (t, pre_p);
4405 if (needed_sseregs)
4407 t = build_int_cst (TREE_TYPE (fpr),
4408 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4409 + REGPARM_MAX * 8);
4410 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4411 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4412 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4413 gimplify_and_add (t, pre_p);
4416 /* Compute index to start of area used for integer regs. */
4417 if (needed_intregs)
4419 /* int_addr = gpr + sav; */
4420 t = fold_convert (ptr_type_node, gpr);
4421 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4422 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4423 gimplify_and_add (t, pre_p);
4425 if (needed_sseregs)
4427 /* sse_addr = fpr + sav; */
4428 t = fold_convert (ptr_type_node, fpr);
4429 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4430 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4431 gimplify_and_add (t, pre_p);
4433 if (need_temp)
4435 int i;
4436 tree temp = create_tmp_var (type, "va_arg_tmp");
4438 /* addr = &temp; */
4439 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4440 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4441 gimplify_and_add (t, pre_p);
4443 for (i = 0; i < XVECLEN (container, 0); i++)
4445 rtx slot = XVECEXP (container, 0, i);
4446 rtx reg = XEXP (slot, 0);
4447 enum machine_mode mode = GET_MODE (reg);
4448 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4449 tree addr_type = build_pointer_type (piece_type);
4450 tree src_addr, src;
4451 int src_offset;
4452 tree dest_addr, dest;
4454 if (SSE_REGNO_P (REGNO (reg)))
4456 src_addr = sse_addr;
4457 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4459 else
4461 src_addr = int_addr;
4462 src_offset = REGNO (reg) * 8;
4464 src_addr = fold_convert (addr_type, src_addr);
4465 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4466 size_int (src_offset)));
4467 src = build_va_arg_indirect_ref (src_addr);
4469 dest_addr = fold_convert (addr_type, addr);
4470 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4471 size_int (INTVAL (XEXP (slot, 1)))));
4472 dest = build_va_arg_indirect_ref (dest_addr);
4474 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4475 gimplify_and_add (t, pre_p);
4479 if (needed_intregs)
4481 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4482 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4483 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4484 gimplify_and_add (t, pre_p);
4486 if (needed_sseregs)
4488 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4489 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4490 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4491 gimplify_and_add (t, pre_p);
4494 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4495 gimplify_and_add (t, pre_p);
4497 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4498 append_to_statement_list (t, pre_p);
4501 /* ... otherwise out of the overflow area. */
4503 /* Care for on-stack alignment if needed. */
4504 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4505 || integer_zerop (TYPE_SIZE (type)))
4506 t = ovf;
4507 else
4509 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4510 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4511 build_int_cst (TREE_TYPE (ovf), align - 1));
4512 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4513 build_int_cst (TREE_TYPE (t), -align));
4515 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4517 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4518 gimplify_and_add (t2, pre_p);
4520 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4521 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4522 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4523 gimplify_and_add (t, pre_p);
4525 if (container)
4527 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4528 append_to_statement_list (t, pre_p);
4531 ptrtype = build_pointer_type (type);
4532 addr = fold_convert (ptrtype, addr);
4534 if (indirect_p)
4535 addr = build_va_arg_indirect_ref (addr);
4536 return build_va_arg_indirect_ref (addr);
4539 /* Return nonzero if OPNUM's MEM should be matched
4540 in movabs* patterns. */
4543 ix86_check_movabs (rtx insn, int opnum)
4545 rtx set, mem;
4547 set = PATTERN (insn);
4548 if (GET_CODE (set) == PARALLEL)
4549 set = XVECEXP (set, 0, 0);
4550 gcc_assert (GET_CODE (set) == SET);
4551 mem = XEXP (set, opnum);
4552 while (GET_CODE (mem) == SUBREG)
4553 mem = SUBREG_REG (mem);
4554 gcc_assert (GET_CODE (mem) == MEM);
4555 return (volatile_ok || !MEM_VOLATILE_P (mem));
4558 /* Initialize the table of extra 80387 mathematical constants. */
4560 static void
4561 init_ext_80387_constants (void)
4563 static const char * cst[5] =
4565 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4566 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4567 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4568 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4569 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4571 int i;
4573 for (i = 0; i < 5; i++)
4575 real_from_string (&ext_80387_constants_table[i], cst[i]);
4576 /* Ensure each constant is rounded to XFmode precision. */
4577 real_convert (&ext_80387_constants_table[i],
4578 XFmode, &ext_80387_constants_table[i]);
4581 ext_80387_constants_init = 1;
4584 /* Return true if the constant is something that can be loaded with
4585 a special instruction. */
4588 standard_80387_constant_p (rtx x)
4590 REAL_VALUE_TYPE r;
4592 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4593 return -1;
4595 if (x == CONST0_RTX (GET_MODE (x)))
4596 return 1;
4597 if (x == CONST1_RTX (GET_MODE (x)))
4598 return 2;
4600 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4602 /* For XFmode constants, try to find a special 80387 instruction when
4603 optimizing for size or on those CPUs that benefit from them. */
4604 if (GET_MODE (x) == XFmode
4605 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4607 int i;
4609 if (! ext_80387_constants_init)
4610 init_ext_80387_constants ();
4612 for (i = 0; i < 5; i++)
4613 if (real_identical (&r, &ext_80387_constants_table[i]))
4614 return i + 3;
4617 /* Load of the constant -0.0 or -1.0 will be split as
4618 fldz;fchs or fld1;fchs sequence. */
4619 if (real_isnegzero (&r))
4620 return 8;
4621 if (real_identical (&r, &dconstm1))
4622 return 9;
4624 return 0;
4627 /* Return the opcode of the special instruction to be used to load
4628 the constant X. */
4630 const char *
4631 standard_80387_constant_opcode (rtx x)
4633 switch (standard_80387_constant_p (x))
4635 case 1:
4636 return "fldz";
4637 case 2:
4638 return "fld1";
4639 case 3:
4640 return "fldlg2";
4641 case 4:
4642 return "fldln2";
4643 case 5:
4644 return "fldl2e";
4645 case 6:
4646 return "fldl2t";
4647 case 7:
4648 return "fldpi";
4649 case 8:
4650 case 9:
4651 return "#";
4652 default:
4653 gcc_unreachable ();
4657 /* Return the CONST_DOUBLE representing the 80387 constant that is
4658 loaded by the specified special instruction. The argument IDX
4659 matches the return value from standard_80387_constant_p. */
4662 standard_80387_constant_rtx (int idx)
4664 int i;
4666 if (! ext_80387_constants_init)
4667 init_ext_80387_constants ();
4669 switch (idx)
4671 case 3:
4672 case 4:
4673 case 5:
4674 case 6:
4675 case 7:
4676 i = idx - 3;
4677 break;
4679 default:
4680 gcc_unreachable ();
4683 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4684 XFmode);
4687 /* Return 1 if mode is a valid mode for sse. */
4688 static int
4689 standard_sse_mode_p (enum machine_mode mode)
4691 switch (mode)
4693 case V16QImode:
4694 case V8HImode:
4695 case V4SImode:
4696 case V2DImode:
4697 case V4SFmode:
4698 case V2DFmode:
4699 return 1;
4701 default:
4702 return 0;
4706 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4709 standard_sse_constant_p (rtx x)
4711 enum machine_mode mode = GET_MODE (x);
4713 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4714 return 1;
4715 if (vector_all_ones_operand (x, mode)
4716 && standard_sse_mode_p (mode))
4717 return TARGET_SSE2 ? 2 : -1;
4719 return 0;
4722 /* Return the opcode of the special instruction to be used to load
4723 the constant X. */
4725 const char *
4726 standard_sse_constant_opcode (rtx insn, rtx x)
4728 switch (standard_sse_constant_p (x))
4730 case 1:
4731 if (get_attr_mode (insn) == MODE_V4SF)
4732 return "xorps\t%0, %0";
4733 else if (get_attr_mode (insn) == MODE_V2DF)
4734 return "xorpd\t%0, %0";
4735 else
4736 return "pxor\t%0, %0";
4737 case 2:
4738 return "pcmpeqd\t%0, %0";
4740 gcc_unreachable ();
4743 /* Returns 1 if OP contains a symbol reference */
4746 symbolic_reference_mentioned_p (rtx op)
4748 const char *fmt;
4749 int i;
4751 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4752 return 1;
4754 fmt = GET_RTX_FORMAT (GET_CODE (op));
4755 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4757 if (fmt[i] == 'E')
4759 int j;
4761 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4762 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4763 return 1;
4766 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4767 return 1;
4770 return 0;
4773 /* Return 1 if it is appropriate to emit `ret' instructions in the
4774 body of a function. Do this only if the epilogue is simple, needing a
4775 couple of insns. Prior to reloading, we can't tell how many registers
4776 must be saved, so return 0 then. Return 0 if there is no frame
4777 marker to de-allocate. */
4780 ix86_can_use_return_insn_p (void)
4782 struct ix86_frame frame;
4784 if (! reload_completed || frame_pointer_needed)
4785 return 0;
4787 /* Don't allow more than 32 pop, since that's all we can do
4788 with one instruction. */
4789 if (current_function_pops_args
4790 && current_function_args_size >= 32768)
4791 return 0;
4793 ix86_compute_frame_layout (&frame);
4794 return frame.to_allocate == 0 && frame.nregs == 0;
4797 /* Value should be nonzero if functions must have frame pointers.
4798 Zero means the frame pointer need not be set up (and parms may
4799 be accessed via the stack pointer) in functions that seem suitable. */
4802 ix86_frame_pointer_required (void)
4804 /* If we accessed previous frames, then the generated code expects
4805 to be able to access the saved ebp value in our frame. */
4806 if (cfun->machine->accesses_prev_frame)
4807 return 1;
4809 /* Several x86 os'es need a frame pointer for other reasons,
4810 usually pertaining to setjmp. */
4811 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4812 return 1;
4814 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4815 the frame pointer by default. Turn it back on now if we've not
4816 got a leaf function. */
4817 if (TARGET_OMIT_LEAF_FRAME_POINTER
4818 && (!current_function_is_leaf
4819 || ix86_current_function_calls_tls_descriptor))
4820 return 1;
4822 if (current_function_profile)
4823 return 1;
4825 return 0;
4828 /* Record that the current function accesses previous call frames. */
4830 void
4831 ix86_setup_frame_addresses (void)
4833 cfun->machine->accesses_prev_frame = 1;
4836 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4837 # define USE_HIDDEN_LINKONCE 1
4838 #else
4839 # define USE_HIDDEN_LINKONCE 0
4840 #endif
4842 static int pic_labels_used;
4844 /* Fills in the label name that should be used for a pc thunk for
4845 the given register. */
4847 static void
4848 get_pc_thunk_name (char name[32], unsigned int regno)
4850 gcc_assert (!TARGET_64BIT);
4852 if (USE_HIDDEN_LINKONCE)
4853 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4854 else
4855 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4859 /* This function generates code for -fpic that loads %ebx with
4860 the return address of the caller and then returns. */
4862 void
4863 ix86_file_end (void)
4865 rtx xops[2];
4866 int regno;
4868 for (regno = 0; regno < 8; ++regno)
4870 char name[32];
4872 if (! ((pic_labels_used >> regno) & 1))
4873 continue;
4875 get_pc_thunk_name (name, regno);
4877 #if TARGET_MACHO
4878 if (TARGET_MACHO)
4880 switch_to_section (darwin_sections[text_coal_section]);
4881 fputs ("\t.weak_definition\t", asm_out_file);
4882 assemble_name (asm_out_file, name);
4883 fputs ("\n\t.private_extern\t", asm_out_file);
4884 assemble_name (asm_out_file, name);
4885 fputs ("\n", asm_out_file);
4886 ASM_OUTPUT_LABEL (asm_out_file, name);
4888 else
4889 #endif
4890 if (USE_HIDDEN_LINKONCE)
4892 tree decl;
4894 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4895 error_mark_node);
4896 TREE_PUBLIC (decl) = 1;
4897 TREE_STATIC (decl) = 1;
4898 DECL_ONE_ONLY (decl) = 1;
4900 (*targetm.asm_out.unique_section) (decl, 0);
4901 switch_to_section (get_named_section (decl, NULL, 0));
4903 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4904 fputs ("\t.hidden\t", asm_out_file);
4905 assemble_name (asm_out_file, name);
4906 fputc ('\n', asm_out_file);
4907 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4909 else
4911 switch_to_section (text_section);
4912 ASM_OUTPUT_LABEL (asm_out_file, name);
4915 xops[0] = gen_rtx_REG (SImode, regno);
4916 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4917 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4918 output_asm_insn ("ret", xops);
4921 if (NEED_INDICATE_EXEC_STACK)
4922 file_end_indicate_exec_stack ();
4925 /* Emit code for the SET_GOT patterns. */
4927 const char *
4928 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4930 rtx xops[3];
4932 xops[0] = dest;
4933 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4935 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4937 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4939 if (!flag_pic)
4940 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4941 else
4942 output_asm_insn ("call\t%a2", xops);
4944 #if TARGET_MACHO
4945 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4946 is what will be referenced by the Mach-O PIC subsystem. */
4947 if (!label)
4948 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4949 #endif
4951 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4952 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4954 if (flag_pic)
4955 output_asm_insn ("pop{l}\t%0", xops);
4957 else
4959 char name[32];
4960 get_pc_thunk_name (name, REGNO (dest));
4961 pic_labels_used |= 1 << REGNO (dest);
4963 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4964 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4965 output_asm_insn ("call\t%X2", xops);
4966 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4967 is what will be referenced by the Mach-O PIC subsystem. */
4968 #if TARGET_MACHO
4969 if (!label)
4970 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4971 else
4972 targetm.asm_out.internal_label (asm_out_file, "L",
4973 CODE_LABEL_NUMBER (label));
4974 #endif
4977 if (TARGET_MACHO)
4978 return "";
4980 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4981 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4982 else
4983 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4985 return "";
4988 /* Generate an "push" pattern for input ARG. */
4990 static rtx
4991 gen_push (rtx arg)
4993 return gen_rtx_SET (VOIDmode,
4994 gen_rtx_MEM (Pmode,
4995 gen_rtx_PRE_DEC (Pmode,
4996 stack_pointer_rtx)),
4997 arg);
5000 /* Return >= 0 if there is an unused call-clobbered register available
5001 for the entire function. */
5003 static unsigned int
5004 ix86_select_alt_pic_regnum (void)
5006 if (current_function_is_leaf && !current_function_profile
5007 && !ix86_current_function_calls_tls_descriptor)
5009 int i;
5010 for (i = 2; i >= 0; --i)
5011 if (!regs_ever_live[i])
5012 return i;
5015 return INVALID_REGNUM;
5018 /* Return 1 if we need to save REGNO. */
5019 static int
5020 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5022 if (pic_offset_table_rtx
5023 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5024 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5025 || current_function_profile
5026 || current_function_calls_eh_return
5027 || current_function_uses_const_pool))
5029 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5030 return 0;
5031 return 1;
5034 if (current_function_calls_eh_return && maybe_eh_return)
5036 unsigned i;
5037 for (i = 0; ; i++)
5039 unsigned test = EH_RETURN_DATA_REGNO (i);
5040 if (test == INVALID_REGNUM)
5041 break;
5042 if (test == regno)
5043 return 1;
5047 if (cfun->machine->force_align_arg_pointer
5048 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5049 return 1;
5051 return (regs_ever_live[regno]
5052 && !call_used_regs[regno]
5053 && !fixed_regs[regno]
5054 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5057 /* Return number of registers to be saved on the stack. */
5059 static int
5060 ix86_nsaved_regs (void)
5062 int nregs = 0;
5063 int regno;
5065 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5066 if (ix86_save_reg (regno, true))
5067 nregs++;
5068 return nregs;
5071 /* Return the offset between two registers, one to be eliminated, and the other
5072 its replacement, at the start of a routine. */
5074 HOST_WIDE_INT
5075 ix86_initial_elimination_offset (int from, int to)
5077 struct ix86_frame frame;
5078 ix86_compute_frame_layout (&frame);
5080 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5081 return frame.hard_frame_pointer_offset;
5082 else if (from == FRAME_POINTER_REGNUM
5083 && to == HARD_FRAME_POINTER_REGNUM)
5084 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5085 else
5087 gcc_assert (to == STACK_POINTER_REGNUM);
5089 if (from == ARG_POINTER_REGNUM)
5090 return frame.stack_pointer_offset;
5092 gcc_assert (from == FRAME_POINTER_REGNUM);
5093 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5097 /* Fill structure ix86_frame about frame of currently computed function. */
5099 static void
5100 ix86_compute_frame_layout (struct ix86_frame *frame)
5102 HOST_WIDE_INT total_size;
5103 unsigned int stack_alignment_needed;
5104 HOST_WIDE_INT offset;
5105 unsigned int preferred_alignment;
5106 HOST_WIDE_INT size = get_frame_size ();
5108 frame->nregs = ix86_nsaved_regs ();
5109 total_size = size;
5111 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5112 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5114 /* During reload iteration the amount of registers saved can change.
5115 Recompute the value as needed. Do not recompute when amount of registers
5116 didn't change as reload does multiple calls to the function and does not
5117 expect the decision to change within single iteration. */
5118 if (!optimize_size
5119 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5121 int count = frame->nregs;
5123 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5124 /* The fast prologue uses move instead of push to save registers. This
5125 is significantly longer, but also executes faster as modern hardware
5126 can execute the moves in parallel, but can't do that for push/pop.
5128 Be careful about choosing what prologue to emit: When function takes
5129 many instructions to execute we may use slow version as well as in
5130 case function is known to be outside hot spot (this is known with
5131 feedback only). Weight the size of function by number of registers
5132 to save as it is cheap to use one or two push instructions but very
5133 slow to use many of them. */
5134 if (count)
5135 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5136 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5137 || (flag_branch_probabilities
5138 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5139 cfun->machine->use_fast_prologue_epilogue = false;
5140 else
5141 cfun->machine->use_fast_prologue_epilogue
5142 = !expensive_function_p (count);
5144 if (TARGET_PROLOGUE_USING_MOVE
5145 && cfun->machine->use_fast_prologue_epilogue)
5146 frame->save_regs_using_mov = true;
5147 else
5148 frame->save_regs_using_mov = false;
5151 /* Skip return address and saved base pointer. */
5152 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5154 frame->hard_frame_pointer_offset = offset;
5156 /* Do some sanity checking of stack_alignment_needed and
5157 preferred_alignment, since i386 port is the only using those features
5158 that may break easily. */
5160 gcc_assert (!size || stack_alignment_needed);
5161 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5162 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5163 gcc_assert (stack_alignment_needed
5164 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5166 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5167 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5169 /* Register save area */
5170 offset += frame->nregs * UNITS_PER_WORD;
5172 /* Va-arg area */
5173 if (ix86_save_varrargs_registers)
5175 offset += X86_64_VARARGS_SIZE;
5176 frame->va_arg_size = X86_64_VARARGS_SIZE;
5178 else
5179 frame->va_arg_size = 0;
5181 /* Align start of frame for local function. */
5182 frame->padding1 = ((offset + stack_alignment_needed - 1)
5183 & -stack_alignment_needed) - offset;
5185 offset += frame->padding1;
5187 /* Frame pointer points here. */
5188 frame->frame_pointer_offset = offset;
5190 offset += size;
5192 /* Add outgoing arguments area. Can be skipped if we eliminated
5193 all the function calls as dead code.
5194 Skipping is however impossible when function calls alloca. Alloca
5195 expander assumes that last current_function_outgoing_args_size
5196 of stack frame are unused. */
5197 if (ACCUMULATE_OUTGOING_ARGS
5198 && (!current_function_is_leaf || current_function_calls_alloca
5199 || ix86_current_function_calls_tls_descriptor))
5201 offset += current_function_outgoing_args_size;
5202 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5204 else
5205 frame->outgoing_arguments_size = 0;
5207 /* Align stack boundary. Only needed if we're calling another function
5208 or using alloca. */
5209 if (!current_function_is_leaf || current_function_calls_alloca
5210 || ix86_current_function_calls_tls_descriptor)
5211 frame->padding2 = ((offset + preferred_alignment - 1)
5212 & -preferred_alignment) - offset;
5213 else
5214 frame->padding2 = 0;
5216 offset += frame->padding2;
5218 /* We've reached end of stack frame. */
5219 frame->stack_pointer_offset = offset;
5221 /* Size prologue needs to allocate. */
5222 frame->to_allocate =
5223 (size + frame->padding1 + frame->padding2
5224 + frame->outgoing_arguments_size + frame->va_arg_size);
5226 if ((!frame->to_allocate && frame->nregs <= 1)
5227 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5228 frame->save_regs_using_mov = false;
5230 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5231 && current_function_is_leaf
5232 && !ix86_current_function_calls_tls_descriptor)
5234 frame->red_zone_size = frame->to_allocate;
5235 if (frame->save_regs_using_mov)
5236 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5237 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5238 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5240 else
5241 frame->red_zone_size = 0;
5242 frame->to_allocate -= frame->red_zone_size;
5243 frame->stack_pointer_offset -= frame->red_zone_size;
5244 #if 0
5245 fprintf (stderr, "nregs: %i\n", frame->nregs);
5246 fprintf (stderr, "size: %i\n", size);
5247 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5248 fprintf (stderr, "padding1: %i\n", frame->padding1);
5249 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5250 fprintf (stderr, "padding2: %i\n", frame->padding2);
5251 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5252 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5253 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5254 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5255 frame->hard_frame_pointer_offset);
5256 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5257 #endif
5260 /* Emit code to save registers in the prologue. */
5262 static void
5263 ix86_emit_save_regs (void)
5265 unsigned int regno;
5266 rtx insn;
5268 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5269 if (ix86_save_reg (regno, true))
5271 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5272 RTX_FRAME_RELATED_P (insn) = 1;
5276 /* Emit code to save registers using MOV insns. First register
5277 is restored from POINTER + OFFSET. */
5278 static void
5279 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5281 unsigned int regno;
5282 rtx insn;
5284 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5285 if (ix86_save_reg (regno, true))
5287 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5288 Pmode, offset),
5289 gen_rtx_REG (Pmode, regno));
5290 RTX_FRAME_RELATED_P (insn) = 1;
5291 offset += UNITS_PER_WORD;
5295 /* Expand prologue or epilogue stack adjustment.
5296 The pattern exist to put a dependency on all ebp-based memory accesses.
5297 STYLE should be negative if instructions should be marked as frame related,
5298 zero if %r11 register is live and cannot be freely used and positive
5299 otherwise. */
5301 static void
5302 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5304 rtx insn;
5306 if (! TARGET_64BIT)
5307 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5308 else if (x86_64_immediate_operand (offset, DImode))
5309 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5310 else
5312 rtx r11;
5313 /* r11 is used by indirect sibcall return as well, set before the
5314 epilogue and used after the epilogue. ATM indirect sibcall
5315 shouldn't be used together with huge frame sizes in one
5316 function because of the frame_size check in sibcall.c. */
5317 gcc_assert (style);
5318 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5319 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5320 if (style < 0)
5321 RTX_FRAME_RELATED_P (insn) = 1;
5322 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5323 offset));
5325 if (style < 0)
5326 RTX_FRAME_RELATED_P (insn) = 1;
5329 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5331 static rtx
5332 ix86_internal_arg_pointer (void)
5334 bool has_force_align_arg_pointer =
5335 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5336 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5337 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5338 && DECL_NAME (current_function_decl)
5339 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5340 && DECL_FILE_SCOPE_P (current_function_decl))
5341 || ix86_force_align_arg_pointer
5342 || has_force_align_arg_pointer)
5344 /* Nested functions can't realign the stack due to a register
5345 conflict. */
5346 if (DECL_CONTEXT (current_function_decl)
5347 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5349 if (ix86_force_align_arg_pointer)
5350 warning (0, "-mstackrealign ignored for nested functions");
5351 if (has_force_align_arg_pointer)
5352 error ("%s not supported for nested functions",
5353 ix86_force_align_arg_pointer_string);
5354 return virtual_incoming_args_rtx;
5356 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5357 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5359 else
5360 return virtual_incoming_args_rtx;
5363 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5364 This is called from dwarf2out.c to emit call frame instructions
5365 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5366 static void
5367 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5369 rtx unspec = SET_SRC (pattern);
5370 gcc_assert (GET_CODE (unspec) == UNSPEC);
5372 switch (index)
5374 case UNSPEC_REG_SAVE:
5375 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5376 SET_DEST (pattern));
5377 break;
5378 case UNSPEC_DEF_CFA:
5379 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5380 INTVAL (XVECEXP (unspec, 0, 0)));
5381 break;
5382 default:
5383 gcc_unreachable ();
5387 /* Expand the prologue into a bunch of separate insns. */
5389 void
5390 ix86_expand_prologue (void)
5392 rtx insn;
5393 bool pic_reg_used;
5394 struct ix86_frame frame;
5395 HOST_WIDE_INT allocate;
5397 ix86_compute_frame_layout (&frame);
5399 if (cfun->machine->force_align_arg_pointer)
5401 rtx x, y;
5403 /* Grab the argument pointer. */
5404 x = plus_constant (stack_pointer_rtx, 4);
5405 y = cfun->machine->force_align_arg_pointer;
5406 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5407 RTX_FRAME_RELATED_P (insn) = 1;
5409 /* The unwind info consists of two parts: install the fafp as the cfa,
5410 and record the fafp as the "save register" of the stack pointer.
5411 The later is there in order that the unwinder can see where it
5412 should restore the stack pointer across the and insn. */
5413 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5414 x = gen_rtx_SET (VOIDmode, y, x);
5415 RTX_FRAME_RELATED_P (x) = 1;
5416 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5417 UNSPEC_REG_SAVE);
5418 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5419 RTX_FRAME_RELATED_P (y) = 1;
5420 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5421 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5422 REG_NOTES (insn) = x;
5424 /* Align the stack. */
5425 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5426 GEN_INT (-16)));
5428 /* And here we cheat like madmen with the unwind info. We force the
5429 cfa register back to sp+4, which is exactly what it was at the
5430 start of the function. Re-pushing the return address results in
5431 the return at the same spot relative to the cfa, and thus is
5432 correct wrt the unwind info. */
5433 x = cfun->machine->force_align_arg_pointer;
5434 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5435 insn = emit_insn (gen_push (x));
5436 RTX_FRAME_RELATED_P (insn) = 1;
5438 x = GEN_INT (4);
5439 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5440 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5441 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5442 REG_NOTES (insn) = x;
5445 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5446 slower on all targets. Also sdb doesn't like it. */
5448 if (frame_pointer_needed)
5450 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5451 RTX_FRAME_RELATED_P (insn) = 1;
5453 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5454 RTX_FRAME_RELATED_P (insn) = 1;
5457 allocate = frame.to_allocate;
5459 if (!frame.save_regs_using_mov)
5460 ix86_emit_save_regs ();
5461 else
5462 allocate += frame.nregs * UNITS_PER_WORD;
5464 /* When using red zone we may start register saving before allocating
5465 the stack frame saving one cycle of the prologue. */
5466 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5467 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5468 : stack_pointer_rtx,
5469 -frame.nregs * UNITS_PER_WORD);
5471 if (allocate == 0)
5473 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5474 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5475 GEN_INT (-allocate), -1);
5476 else
5478 /* Only valid for Win32. */
5479 rtx eax = gen_rtx_REG (SImode, 0);
5480 bool eax_live = ix86_eax_live_at_start_p ();
5481 rtx t;
5483 gcc_assert (!TARGET_64BIT);
5485 if (eax_live)
5487 emit_insn (gen_push (eax));
5488 allocate -= 4;
5491 emit_move_insn (eax, GEN_INT (allocate));
5493 insn = emit_insn (gen_allocate_stack_worker (eax));
5494 RTX_FRAME_RELATED_P (insn) = 1;
5495 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5496 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5497 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5498 t, REG_NOTES (insn));
5500 if (eax_live)
5502 if (frame_pointer_needed)
5503 t = plus_constant (hard_frame_pointer_rtx,
5504 allocate
5505 - frame.to_allocate
5506 - frame.nregs * UNITS_PER_WORD);
5507 else
5508 t = plus_constant (stack_pointer_rtx, allocate);
5509 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5513 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5515 if (!frame_pointer_needed || !frame.to_allocate)
5516 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5517 else
5518 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5519 -frame.nregs * UNITS_PER_WORD);
5522 pic_reg_used = false;
5523 if (pic_offset_table_rtx
5524 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5525 || current_function_profile))
5527 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5529 if (alt_pic_reg_used != INVALID_REGNUM)
5530 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5532 pic_reg_used = true;
5535 if (pic_reg_used)
5537 if (TARGET_64BIT)
5538 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5539 else
5540 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5542 /* Even with accurate pre-reload life analysis, we can wind up
5543 deleting all references to the pic register after reload.
5544 Consider if cross-jumping unifies two sides of a branch
5545 controlled by a comparison vs the only read from a global.
5546 In which case, allow the set_got to be deleted, though we're
5547 too late to do anything about the ebx save in the prologue. */
5548 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5551 /* Prevent function calls from be scheduled before the call to mcount.
5552 In the pic_reg_used case, make sure that the got load isn't deleted. */
5553 if (current_function_profile)
5554 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5557 /* Emit code to restore saved registers using MOV insns. First register
5558 is restored from POINTER + OFFSET. */
5559 static void
5560 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5561 int maybe_eh_return)
5563 int regno;
5564 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5566 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5567 if (ix86_save_reg (regno, maybe_eh_return))
5569 /* Ensure that adjust_address won't be forced to produce pointer
5570 out of range allowed by x86-64 instruction set. */
5571 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5573 rtx r11;
5575 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5576 emit_move_insn (r11, GEN_INT (offset));
5577 emit_insn (gen_adddi3 (r11, r11, pointer));
5578 base_address = gen_rtx_MEM (Pmode, r11);
5579 offset = 0;
5581 emit_move_insn (gen_rtx_REG (Pmode, regno),
5582 adjust_address (base_address, Pmode, offset));
5583 offset += UNITS_PER_WORD;
5587 /* Restore function stack, frame, and registers. */
5589 void
5590 ix86_expand_epilogue (int style)
5592 int regno;
5593 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5594 struct ix86_frame frame;
5595 HOST_WIDE_INT offset;
5597 ix86_compute_frame_layout (&frame);
5599 /* Calculate start of saved registers relative to ebp. Special care
5600 must be taken for the normal return case of a function using
5601 eh_return: the eax and edx registers are marked as saved, but not
5602 restored along this path. */
5603 offset = frame.nregs;
5604 if (current_function_calls_eh_return && style != 2)
5605 offset -= 2;
5606 offset *= -UNITS_PER_WORD;
5608 /* If we're only restoring one register and sp is not valid then
5609 using a move instruction to restore the register since it's
5610 less work than reloading sp and popping the register.
5612 The default code result in stack adjustment using add/lea instruction,
5613 while this code results in LEAVE instruction (or discrete equivalent),
5614 so it is profitable in some other cases as well. Especially when there
5615 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5616 and there is exactly one register to pop. This heuristic may need some
5617 tuning in future. */
5618 if ((!sp_valid && frame.nregs <= 1)
5619 || (TARGET_EPILOGUE_USING_MOVE
5620 && cfun->machine->use_fast_prologue_epilogue
5621 && (frame.nregs > 1 || frame.to_allocate))
5622 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5623 || (frame_pointer_needed && TARGET_USE_LEAVE
5624 && cfun->machine->use_fast_prologue_epilogue
5625 && frame.nregs == 1)
5626 || current_function_calls_eh_return)
5628 /* Restore registers. We can use ebp or esp to address the memory
5629 locations. If both are available, default to ebp, since offsets
5630 are known to be small. Only exception is esp pointing directly to the
5631 end of block of saved registers, where we may simplify addressing
5632 mode. */
5634 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5635 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5636 frame.to_allocate, style == 2);
5637 else
5638 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5639 offset, style == 2);
5641 /* eh_return epilogues need %ecx added to the stack pointer. */
5642 if (style == 2)
5644 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5646 if (frame_pointer_needed)
5648 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5649 tmp = plus_constant (tmp, UNITS_PER_WORD);
5650 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5652 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5653 emit_move_insn (hard_frame_pointer_rtx, tmp);
5655 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5656 const0_rtx, style);
5658 else
5660 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5661 tmp = plus_constant (tmp, (frame.to_allocate
5662 + frame.nregs * UNITS_PER_WORD));
5663 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5666 else if (!frame_pointer_needed)
5667 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5668 GEN_INT (frame.to_allocate
5669 + frame.nregs * UNITS_PER_WORD),
5670 style);
5671 /* If not an i386, mov & pop is faster than "leave". */
5672 else if (TARGET_USE_LEAVE || optimize_size
5673 || !cfun->machine->use_fast_prologue_epilogue)
5674 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5675 else
5677 pro_epilogue_adjust_stack (stack_pointer_rtx,
5678 hard_frame_pointer_rtx,
5679 const0_rtx, style);
5680 if (TARGET_64BIT)
5681 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5682 else
5683 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5686 else
5688 /* First step is to deallocate the stack frame so that we can
5689 pop the registers. */
5690 if (!sp_valid)
5692 gcc_assert (frame_pointer_needed);
5693 pro_epilogue_adjust_stack (stack_pointer_rtx,
5694 hard_frame_pointer_rtx,
5695 GEN_INT (offset), style);
5697 else if (frame.to_allocate)
5698 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5699 GEN_INT (frame.to_allocate), style);
5701 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5702 if (ix86_save_reg (regno, false))
5704 if (TARGET_64BIT)
5705 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5706 else
5707 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5709 if (frame_pointer_needed)
5711 /* Leave results in shorter dependency chains on CPUs that are
5712 able to grok it fast. */
5713 if (TARGET_USE_LEAVE)
5714 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5715 else if (TARGET_64BIT)
5716 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5717 else
5718 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5722 if (cfun->machine->force_align_arg_pointer)
5724 emit_insn (gen_addsi3 (stack_pointer_rtx,
5725 cfun->machine->force_align_arg_pointer,
5726 GEN_INT (-4)));
5729 /* Sibcall epilogues don't want a return instruction. */
5730 if (style == 0)
5731 return;
5733 if (current_function_pops_args && current_function_args_size)
5735 rtx popc = GEN_INT (current_function_pops_args);
5737 /* i386 can only pop 64K bytes. If asked to pop more, pop
5738 return address, do explicit add, and jump indirectly to the
5739 caller. */
5741 if (current_function_pops_args >= 65536)
5743 rtx ecx = gen_rtx_REG (SImode, 2);
5745 /* There is no "pascal" calling convention in 64bit ABI. */
5746 gcc_assert (!TARGET_64BIT);
5748 emit_insn (gen_popsi1 (ecx));
5749 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5750 emit_jump_insn (gen_return_indirect_internal (ecx));
5752 else
5753 emit_jump_insn (gen_return_pop_internal (popc));
5755 else
5756 emit_jump_insn (gen_return_internal ());
5759 /* Reset from the function's potential modifications. */
5761 static void
5762 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5763 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5765 if (pic_offset_table_rtx)
5766 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5767 #if TARGET_MACHO
5768 /* Mach-O doesn't support labels at the end of objects, so if
5769 it looks like we might want one, insert a NOP. */
5771 rtx insn = get_last_insn ();
5772 while (insn
5773 && NOTE_P (insn)
5774 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5775 insn = PREV_INSN (insn);
5776 if (insn
5777 && (LABEL_P (insn)
5778 || (NOTE_P (insn)
5779 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5780 fputs ("\tnop\n", file);
5782 #endif
5786 /* Extract the parts of an RTL expression that is a valid memory address
5787 for an instruction. Return 0 if the structure of the address is
5788 grossly off. Return -1 if the address contains ASHIFT, so it is not
5789 strictly valid, but still used for computing length of lea instruction. */
5792 ix86_decompose_address (rtx addr, struct ix86_address *out)
5794 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5795 rtx base_reg, index_reg;
5796 HOST_WIDE_INT scale = 1;
5797 rtx scale_rtx = NULL_RTX;
5798 int retval = 1;
5799 enum ix86_address_seg seg = SEG_DEFAULT;
5801 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5802 base = addr;
5803 else if (GET_CODE (addr) == PLUS)
5805 rtx addends[4], op;
5806 int n = 0, i;
5808 op = addr;
5811 if (n >= 4)
5812 return 0;
5813 addends[n++] = XEXP (op, 1);
5814 op = XEXP (op, 0);
5816 while (GET_CODE (op) == PLUS);
5817 if (n >= 4)
5818 return 0;
5819 addends[n] = op;
5821 for (i = n; i >= 0; --i)
5823 op = addends[i];
5824 switch (GET_CODE (op))
5826 case MULT:
5827 if (index)
5828 return 0;
5829 index = XEXP (op, 0);
5830 scale_rtx = XEXP (op, 1);
5831 break;
5833 case UNSPEC:
5834 if (XINT (op, 1) == UNSPEC_TP
5835 && TARGET_TLS_DIRECT_SEG_REFS
5836 && seg == SEG_DEFAULT)
5837 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5838 else
5839 return 0;
5840 break;
5842 case REG:
5843 case SUBREG:
5844 if (!base)
5845 base = op;
5846 else if (!index)
5847 index = op;
5848 else
5849 return 0;
5850 break;
5852 case CONST:
5853 case CONST_INT:
5854 case SYMBOL_REF:
5855 case LABEL_REF:
5856 if (disp)
5857 return 0;
5858 disp = op;
5859 break;
5861 default:
5862 return 0;
5866 else if (GET_CODE (addr) == MULT)
5868 index = XEXP (addr, 0); /* index*scale */
5869 scale_rtx = XEXP (addr, 1);
5871 else if (GET_CODE (addr) == ASHIFT)
5873 rtx tmp;
5875 /* We're called for lea too, which implements ashift on occasion. */
5876 index = XEXP (addr, 0);
5877 tmp = XEXP (addr, 1);
5878 if (GET_CODE (tmp) != CONST_INT)
5879 return 0;
5880 scale = INTVAL (tmp);
5881 if ((unsigned HOST_WIDE_INT) scale > 3)
5882 return 0;
5883 scale = 1 << scale;
5884 retval = -1;
5886 else
5887 disp = addr; /* displacement */
5889 /* Extract the integral value of scale. */
5890 if (scale_rtx)
5892 if (GET_CODE (scale_rtx) != CONST_INT)
5893 return 0;
5894 scale = INTVAL (scale_rtx);
5897 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5898 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5900 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5901 if (base_reg && index_reg && scale == 1
5902 && (index_reg == arg_pointer_rtx
5903 || index_reg == frame_pointer_rtx
5904 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5906 rtx tmp;
5907 tmp = base, base = index, index = tmp;
5908 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5911 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5912 if ((base_reg == hard_frame_pointer_rtx
5913 || base_reg == frame_pointer_rtx
5914 || base_reg == arg_pointer_rtx) && !disp)
5915 disp = const0_rtx;
5917 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5918 Avoid this by transforming to [%esi+0]. */
5919 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5920 && base_reg && !index_reg && !disp
5921 && REG_P (base_reg)
5922 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5923 disp = const0_rtx;
5925 /* Special case: encode reg+reg instead of reg*2. */
5926 if (!base && index && scale && scale == 2)
5927 base = index, base_reg = index_reg, scale = 1;
5929 /* Special case: scaling cannot be encoded without base or displacement. */
5930 if (!base && !disp && index && scale != 1)
5931 disp = const0_rtx;
5933 out->base = base;
5934 out->index = index;
5935 out->disp = disp;
5936 out->scale = scale;
5937 out->seg = seg;
5939 return retval;
5942 /* Return cost of the memory address x.
5943 For i386, it is better to use a complex address than let gcc copy
5944 the address into a reg and make a new pseudo. But not if the address
5945 requires to two regs - that would mean more pseudos with longer
5946 lifetimes. */
5947 static int
5948 ix86_address_cost (rtx x)
5950 struct ix86_address parts;
5951 int cost = 1;
5952 int ok = ix86_decompose_address (x, &parts);
5954 gcc_assert (ok);
5956 if (parts.base && GET_CODE (parts.base) == SUBREG)
5957 parts.base = SUBREG_REG (parts.base);
5958 if (parts.index && GET_CODE (parts.index) == SUBREG)
5959 parts.index = SUBREG_REG (parts.index);
5961 /* More complex memory references are better. */
5962 if (parts.disp && parts.disp != const0_rtx)
5963 cost--;
5964 if (parts.seg != SEG_DEFAULT)
5965 cost--;
5967 /* Attempt to minimize number of registers in the address. */
5968 if ((parts.base
5969 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5970 || (parts.index
5971 && (!REG_P (parts.index)
5972 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5973 cost++;
5975 if (parts.base
5976 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5977 && parts.index
5978 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5979 && parts.base != parts.index)
5980 cost++;
5982 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5983 since it's predecode logic can't detect the length of instructions
5984 and it degenerates to vector decoded. Increase cost of such
5985 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5986 to split such addresses or even refuse such addresses at all.
5988 Following addressing modes are affected:
5989 [base+scale*index]
5990 [scale*index+disp]
5991 [base+index]
5993 The first and last case may be avoidable by explicitly coding the zero in
5994 memory address, but I don't have AMD-K6 machine handy to check this
5995 theory. */
5997 if (TARGET_K6
5998 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5999 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6000 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6001 cost += 10;
6003 return cost;
6006 /* If X is a machine specific address (i.e. a symbol or label being
6007 referenced as a displacement from the GOT implemented using an
6008 UNSPEC), then return the base term. Otherwise return X. */
6011 ix86_find_base_term (rtx x)
6013 rtx term;
6015 if (TARGET_64BIT)
6017 if (GET_CODE (x) != CONST)
6018 return x;
6019 term = XEXP (x, 0);
6020 if (GET_CODE (term) == PLUS
6021 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6022 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6023 term = XEXP (term, 0);
6024 if (GET_CODE (term) != UNSPEC
6025 || XINT (term, 1) != UNSPEC_GOTPCREL)
6026 return x;
6028 term = XVECEXP (term, 0, 0);
6030 if (GET_CODE (term) != SYMBOL_REF
6031 && GET_CODE (term) != LABEL_REF)
6032 return x;
6034 return term;
6037 term = ix86_delegitimize_address (x);
6039 if (GET_CODE (term) != SYMBOL_REF
6040 && GET_CODE (term) != LABEL_REF)
6041 return x;
6043 return term;
6046 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6047 this is used for to form addresses to local data when -fPIC is in
6048 use. */
6050 static bool
6051 darwin_local_data_pic (rtx disp)
6053 if (GET_CODE (disp) == MINUS)
6055 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6056 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6057 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6059 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6060 if (! strcmp (sym_name, "<pic base>"))
6061 return true;
6065 return false;
6068 /* Determine if a given RTX is a valid constant. We already know this
6069 satisfies CONSTANT_P. */
6071 bool
6072 legitimate_constant_p (rtx x)
6074 switch (GET_CODE (x))
6076 case CONST:
6077 x = XEXP (x, 0);
6079 if (GET_CODE (x) == PLUS)
6081 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6082 return false;
6083 x = XEXP (x, 0);
6086 if (TARGET_MACHO && darwin_local_data_pic (x))
6087 return true;
6089 /* Only some unspecs are valid as "constants". */
6090 if (GET_CODE (x) == UNSPEC)
6091 switch (XINT (x, 1))
6093 case UNSPEC_GOTOFF:
6094 return TARGET_64BIT;
6095 case UNSPEC_TPOFF:
6096 case UNSPEC_NTPOFF:
6097 x = XVECEXP (x, 0, 0);
6098 return (GET_CODE (x) == SYMBOL_REF
6099 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6100 case UNSPEC_DTPOFF:
6101 x = XVECEXP (x, 0, 0);
6102 return (GET_CODE (x) == SYMBOL_REF
6103 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6104 default:
6105 return false;
6108 /* We must have drilled down to a symbol. */
6109 if (GET_CODE (x) == LABEL_REF)
6110 return true;
6111 if (GET_CODE (x) != SYMBOL_REF)
6112 return false;
6113 /* FALLTHRU */
6115 case SYMBOL_REF:
6116 /* TLS symbols are never valid. */
6117 if (SYMBOL_REF_TLS_MODEL (x))
6118 return false;
6119 break;
6121 case CONST_DOUBLE:
6122 if (GET_MODE (x) == TImode
6123 && x != CONST0_RTX (TImode)
6124 && !TARGET_64BIT)
6125 return false;
6126 break;
6128 case CONST_VECTOR:
6129 if (x == CONST0_RTX (GET_MODE (x)))
6130 return true;
6131 return false;
6133 default:
6134 break;
6137 /* Otherwise we handle everything else in the move patterns. */
6138 return true;
6141 /* Determine if it's legal to put X into the constant pool. This
6142 is not possible for the address of thread-local symbols, which
6143 is checked above. */
6145 static bool
6146 ix86_cannot_force_const_mem (rtx x)
6148 /* We can always put integral constants and vectors in memory. */
6149 switch (GET_CODE (x))
6151 case CONST_INT:
6152 case CONST_DOUBLE:
6153 case CONST_VECTOR:
6154 return false;
6156 default:
6157 break;
6159 return !legitimate_constant_p (x);
6162 /* Determine if a given RTX is a valid constant address. */
6164 bool
6165 constant_address_p (rtx x)
6167 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6170 /* Nonzero if the constant value X is a legitimate general operand
6171 when generating PIC code. It is given that flag_pic is on and
6172 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6174 bool
6175 legitimate_pic_operand_p (rtx x)
6177 rtx inner;
6179 switch (GET_CODE (x))
6181 case CONST:
6182 inner = XEXP (x, 0);
6183 if (GET_CODE (inner) == PLUS
6184 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6185 inner = XEXP (inner, 0);
6187 /* Only some unspecs are valid as "constants". */
6188 if (GET_CODE (inner) == UNSPEC)
6189 switch (XINT (inner, 1))
6191 case UNSPEC_GOTOFF:
6192 return TARGET_64BIT;
6193 case UNSPEC_TPOFF:
6194 x = XVECEXP (inner, 0, 0);
6195 return (GET_CODE (x) == SYMBOL_REF
6196 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6197 default:
6198 return false;
6200 /* FALLTHRU */
6202 case SYMBOL_REF:
6203 case LABEL_REF:
6204 return legitimate_pic_address_disp_p (x);
6206 default:
6207 return true;
6211 /* Determine if a given CONST RTX is a valid memory displacement
6212 in PIC mode. */
6215 legitimate_pic_address_disp_p (rtx disp)
6217 bool saw_plus;
6219 /* In 64bit mode we can allow direct addresses of symbols and labels
6220 when they are not dynamic symbols. */
6221 if (TARGET_64BIT)
6223 rtx op0 = disp, op1;
6225 switch (GET_CODE (disp))
6227 case LABEL_REF:
6228 return true;
6230 case CONST:
6231 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6232 break;
6233 op0 = XEXP (XEXP (disp, 0), 0);
6234 op1 = XEXP (XEXP (disp, 0), 1);
6235 if (GET_CODE (op1) != CONST_INT
6236 || INTVAL (op1) >= 16*1024*1024
6237 || INTVAL (op1) < -16*1024*1024)
6238 break;
6239 if (GET_CODE (op0) == LABEL_REF)
6240 return true;
6241 if (GET_CODE (op0) != SYMBOL_REF)
6242 break;
6243 /* FALLTHRU */
6245 case SYMBOL_REF:
6246 /* TLS references should always be enclosed in UNSPEC. */
6247 if (SYMBOL_REF_TLS_MODEL (op0))
6248 return false;
6249 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6250 return true;
6251 break;
6253 default:
6254 break;
6257 if (GET_CODE (disp) != CONST)
6258 return 0;
6259 disp = XEXP (disp, 0);
6261 if (TARGET_64BIT)
6263 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6264 of GOT tables. We should not need these anyway. */
6265 if (GET_CODE (disp) != UNSPEC
6266 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6267 && XINT (disp, 1) != UNSPEC_GOTOFF))
6268 return 0;
6270 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6271 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6272 return 0;
6273 return 1;
6276 saw_plus = false;
6277 if (GET_CODE (disp) == PLUS)
6279 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6280 return 0;
6281 disp = XEXP (disp, 0);
6282 saw_plus = true;
6285 if (TARGET_MACHO && darwin_local_data_pic (disp))
6286 return 1;
6288 if (GET_CODE (disp) != UNSPEC)
6289 return 0;
6291 switch (XINT (disp, 1))
6293 case UNSPEC_GOT:
6294 if (saw_plus)
6295 return false;
6296 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6297 case UNSPEC_GOTOFF:
6298 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6299 While ABI specify also 32bit relocation but we don't produce it in
6300 small PIC model at all. */
6301 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6302 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6303 && !TARGET_64BIT)
6304 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6305 return false;
6306 case UNSPEC_GOTTPOFF:
6307 case UNSPEC_GOTNTPOFF:
6308 case UNSPEC_INDNTPOFF:
6309 if (saw_plus)
6310 return false;
6311 disp = XVECEXP (disp, 0, 0);
6312 return (GET_CODE (disp) == SYMBOL_REF
6313 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6314 case UNSPEC_NTPOFF:
6315 disp = XVECEXP (disp, 0, 0);
6316 return (GET_CODE (disp) == SYMBOL_REF
6317 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6318 case UNSPEC_DTPOFF:
6319 disp = XVECEXP (disp, 0, 0);
6320 return (GET_CODE (disp) == SYMBOL_REF
6321 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6324 return 0;
6327 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6328 memory address for an instruction. The MODE argument is the machine mode
6329 for the MEM expression that wants to use this address.
6331 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6332 convert common non-canonical forms to canonical form so that they will
6333 be recognized. */
6336 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6338 struct ix86_address parts;
6339 rtx base, index, disp;
6340 HOST_WIDE_INT scale;
6341 const char *reason = NULL;
6342 rtx reason_rtx = NULL_RTX;
6344 if (TARGET_DEBUG_ADDR)
6346 fprintf (stderr,
6347 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6348 GET_MODE_NAME (mode), strict);
6349 debug_rtx (addr);
6352 if (ix86_decompose_address (addr, &parts) <= 0)
6354 reason = "decomposition failed";
6355 goto report_error;
6358 base = parts.base;
6359 index = parts.index;
6360 disp = parts.disp;
6361 scale = parts.scale;
6363 /* Validate base register.
6365 Don't allow SUBREG's that span more than a word here. It can lead to spill
6366 failures when the base is one word out of a two word structure, which is
6367 represented internally as a DImode int. */
6369 if (base)
6371 rtx reg;
6372 reason_rtx = base;
6374 if (REG_P (base))
6375 reg = base;
6376 else if (GET_CODE (base) == SUBREG
6377 && REG_P (SUBREG_REG (base))
6378 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6379 <= UNITS_PER_WORD)
6380 reg = SUBREG_REG (base);
6381 else
6383 reason = "base is not a register";
6384 goto report_error;
6387 if (GET_MODE (base) != Pmode)
6389 reason = "base is not in Pmode";
6390 goto report_error;
6393 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6394 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6396 reason = "base is not valid";
6397 goto report_error;
6401 /* Validate index register.
6403 Don't allow SUBREG's that span more than a word here -- same as above. */
6405 if (index)
6407 rtx reg;
6408 reason_rtx = index;
6410 if (REG_P (index))
6411 reg = index;
6412 else if (GET_CODE (index) == SUBREG
6413 && REG_P (SUBREG_REG (index))
6414 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6415 <= UNITS_PER_WORD)
6416 reg = SUBREG_REG (index);
6417 else
6419 reason = "index is not a register";
6420 goto report_error;
6423 if (GET_MODE (index) != Pmode)
6425 reason = "index is not in Pmode";
6426 goto report_error;
6429 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6430 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6432 reason = "index is not valid";
6433 goto report_error;
6437 /* Validate scale factor. */
6438 if (scale != 1)
6440 reason_rtx = GEN_INT (scale);
6441 if (!index)
6443 reason = "scale without index";
6444 goto report_error;
6447 if (scale != 2 && scale != 4 && scale != 8)
6449 reason = "scale is not a valid multiplier";
6450 goto report_error;
6454 /* Validate displacement. */
6455 if (disp)
6457 reason_rtx = disp;
6459 if (GET_CODE (disp) == CONST
6460 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6461 switch (XINT (XEXP (disp, 0), 1))
6463 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6464 used. While ABI specify also 32bit relocations, we don't produce
6465 them at all and use IP relative instead. */
6466 case UNSPEC_GOT:
6467 case UNSPEC_GOTOFF:
6468 gcc_assert (flag_pic);
6469 if (!TARGET_64BIT)
6470 goto is_legitimate_pic;
6471 reason = "64bit address unspec";
6472 goto report_error;
6474 case UNSPEC_GOTPCREL:
6475 gcc_assert (flag_pic);
6476 goto is_legitimate_pic;
6478 case UNSPEC_GOTTPOFF:
6479 case UNSPEC_GOTNTPOFF:
6480 case UNSPEC_INDNTPOFF:
6481 case UNSPEC_NTPOFF:
6482 case UNSPEC_DTPOFF:
6483 break;
6485 default:
6486 reason = "invalid address unspec";
6487 goto report_error;
6490 else if (SYMBOLIC_CONST (disp)
6491 && (flag_pic
6492 || (TARGET_MACHO
6493 #if TARGET_MACHO
6494 && MACHOPIC_INDIRECT
6495 && !machopic_operand_p (disp)
6496 #endif
6500 is_legitimate_pic:
6501 if (TARGET_64BIT && (index || base))
6503 /* foo@dtpoff(%rX) is ok. */
6504 if (GET_CODE (disp) != CONST
6505 || GET_CODE (XEXP (disp, 0)) != PLUS
6506 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6507 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6508 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6509 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6511 reason = "non-constant pic memory reference";
6512 goto report_error;
6515 else if (! legitimate_pic_address_disp_p (disp))
6517 reason = "displacement is an invalid pic construct";
6518 goto report_error;
6521 /* This code used to verify that a symbolic pic displacement
6522 includes the pic_offset_table_rtx register.
6524 While this is good idea, unfortunately these constructs may
6525 be created by "adds using lea" optimization for incorrect
6526 code like:
6528 int a;
6529 int foo(int i)
6531 return *(&a+i);
6534 This code is nonsensical, but results in addressing
6535 GOT table with pic_offset_table_rtx base. We can't
6536 just refuse it easily, since it gets matched by
6537 "addsi3" pattern, that later gets split to lea in the
6538 case output register differs from input. While this
6539 can be handled by separate addsi pattern for this case
6540 that never results in lea, this seems to be easier and
6541 correct fix for crash to disable this test. */
6543 else if (GET_CODE (disp) != LABEL_REF
6544 && GET_CODE (disp) != CONST_INT
6545 && (GET_CODE (disp) != CONST
6546 || !legitimate_constant_p (disp))
6547 && (GET_CODE (disp) != SYMBOL_REF
6548 || !legitimate_constant_p (disp)))
6550 reason = "displacement is not constant";
6551 goto report_error;
6553 else if (TARGET_64BIT
6554 && !x86_64_immediate_operand (disp, VOIDmode))
6556 reason = "displacement is out of range";
6557 goto report_error;
6561 /* Everything looks valid. */
6562 if (TARGET_DEBUG_ADDR)
6563 fprintf (stderr, "Success.\n");
6564 return TRUE;
6566 report_error:
6567 if (TARGET_DEBUG_ADDR)
6569 fprintf (stderr, "Error: %s\n", reason);
6570 debug_rtx (reason_rtx);
6572 return FALSE;
6575 /* Return a unique alias set for the GOT. */
6577 static HOST_WIDE_INT
6578 ix86_GOT_alias_set (void)
6580 static HOST_WIDE_INT set = -1;
6581 if (set == -1)
6582 set = new_alias_set ();
6583 return set;
6586 /* Return a legitimate reference for ORIG (an address) using the
6587 register REG. If REG is 0, a new pseudo is generated.
6589 There are two types of references that must be handled:
6591 1. Global data references must load the address from the GOT, via
6592 the PIC reg. An insn is emitted to do this load, and the reg is
6593 returned.
6595 2. Static data references, constant pool addresses, and code labels
6596 compute the address as an offset from the GOT, whose base is in
6597 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6598 differentiate them from global data objects. The returned
6599 address is the PIC reg + an unspec constant.
6601 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6602 reg also appears in the address. */
6604 static rtx
6605 legitimize_pic_address (rtx orig, rtx reg)
6607 rtx addr = orig;
6608 rtx new = orig;
6609 rtx base;
6611 #if TARGET_MACHO
6612 if (TARGET_MACHO && !TARGET_64BIT)
6614 if (reg == 0)
6615 reg = gen_reg_rtx (Pmode);
6616 /* Use the generic Mach-O PIC machinery. */
6617 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6619 #endif
6621 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6622 new = addr;
6623 else if (TARGET_64BIT
6624 && ix86_cmodel != CM_SMALL_PIC
6625 && local_symbolic_operand (addr, Pmode))
6627 rtx tmpreg;
6628 /* This symbol may be referenced via a displacement from the PIC
6629 base address (@GOTOFF). */
6631 if (reload_in_progress)
6632 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6633 if (GET_CODE (addr) == CONST)
6634 addr = XEXP (addr, 0);
6635 if (GET_CODE (addr) == PLUS)
6637 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6638 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6640 else
6641 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6642 new = gen_rtx_CONST (Pmode, new);
6643 if (!reg)
6644 tmpreg = gen_reg_rtx (Pmode);
6645 else
6646 tmpreg = reg;
6647 emit_move_insn (tmpreg, new);
6649 if (reg != 0)
6651 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6652 tmpreg, 1, OPTAB_DIRECT);
6653 new = reg;
6655 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6657 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6659 /* This symbol may be referenced via a displacement from the PIC
6660 base address (@GOTOFF). */
6662 if (reload_in_progress)
6663 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6664 if (GET_CODE (addr) == CONST)
6665 addr = XEXP (addr, 0);
6666 if (GET_CODE (addr) == PLUS)
6668 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6669 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6671 else
6672 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6673 new = gen_rtx_CONST (Pmode, new);
6674 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6676 if (reg != 0)
6678 emit_move_insn (reg, new);
6679 new = reg;
6682 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6684 if (TARGET_64BIT)
6686 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6687 new = gen_rtx_CONST (Pmode, new);
6688 new = gen_const_mem (Pmode, new);
6689 set_mem_alias_set (new, ix86_GOT_alias_set ());
6691 if (reg == 0)
6692 reg = gen_reg_rtx (Pmode);
6693 /* Use directly gen_movsi, otherwise the address is loaded
6694 into register for CSE. We don't want to CSE this addresses,
6695 instead we CSE addresses from the GOT table, so skip this. */
6696 emit_insn (gen_movsi (reg, new));
6697 new = reg;
6699 else
6701 /* This symbol must be referenced via a load from the
6702 Global Offset Table (@GOT). */
6704 if (reload_in_progress)
6705 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6706 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6707 new = gen_rtx_CONST (Pmode, new);
6708 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6709 new = gen_const_mem (Pmode, new);
6710 set_mem_alias_set (new, ix86_GOT_alias_set ());
6712 if (reg == 0)
6713 reg = gen_reg_rtx (Pmode);
6714 emit_move_insn (reg, new);
6715 new = reg;
6718 else
6720 if (GET_CODE (addr) == CONST_INT
6721 && !x86_64_immediate_operand (addr, VOIDmode))
6723 if (reg)
6725 emit_move_insn (reg, addr);
6726 new = reg;
6728 else
6729 new = force_reg (Pmode, addr);
6731 else if (GET_CODE (addr) == CONST)
6733 addr = XEXP (addr, 0);
6735 /* We must match stuff we generate before. Assume the only
6736 unspecs that can get here are ours. Not that we could do
6737 anything with them anyway.... */
6738 if (GET_CODE (addr) == UNSPEC
6739 || (GET_CODE (addr) == PLUS
6740 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6741 return orig;
6742 gcc_assert (GET_CODE (addr) == PLUS);
6744 if (GET_CODE (addr) == PLUS)
6746 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6748 /* Check first to see if this is a constant offset from a @GOTOFF
6749 symbol reference. */
6750 if (local_symbolic_operand (op0, Pmode)
6751 && GET_CODE (op1) == CONST_INT)
6753 if (!TARGET_64BIT)
6755 if (reload_in_progress)
6756 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6757 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6758 UNSPEC_GOTOFF);
6759 new = gen_rtx_PLUS (Pmode, new, op1);
6760 new = gen_rtx_CONST (Pmode, new);
6761 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6763 if (reg != 0)
6765 emit_move_insn (reg, new);
6766 new = reg;
6769 else
6771 if (INTVAL (op1) < -16*1024*1024
6772 || INTVAL (op1) >= 16*1024*1024)
6774 if (!x86_64_immediate_operand (op1, Pmode))
6775 op1 = force_reg (Pmode, op1);
6776 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6780 else
6782 base = legitimize_pic_address (XEXP (addr, 0), reg);
6783 new = legitimize_pic_address (XEXP (addr, 1),
6784 base == reg ? NULL_RTX : reg);
6786 if (GET_CODE (new) == CONST_INT)
6787 new = plus_constant (base, INTVAL (new));
6788 else
6790 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6792 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6793 new = XEXP (new, 1);
6795 new = gen_rtx_PLUS (Pmode, base, new);
6800 return new;
6803 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6805 static rtx
6806 get_thread_pointer (int to_reg)
6808 rtx tp, reg, insn;
6810 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6811 if (!to_reg)
6812 return tp;
6814 reg = gen_reg_rtx (Pmode);
6815 insn = gen_rtx_SET (VOIDmode, reg, tp);
6816 insn = emit_insn (insn);
6818 return reg;
6821 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6822 false if we expect this to be used for a memory address and true if
6823 we expect to load the address into a register. */
6825 static rtx
6826 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6828 rtx dest, base, off, pic, tp;
6829 int type;
6831 switch (model)
6833 case TLS_MODEL_GLOBAL_DYNAMIC:
6834 dest = gen_reg_rtx (Pmode);
6835 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6837 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6839 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6841 start_sequence ();
6842 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6843 insns = get_insns ();
6844 end_sequence ();
6846 emit_libcall_block (insns, dest, rax, x);
6848 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6849 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6850 else
6851 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6853 if (TARGET_GNU2_TLS)
6855 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6857 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6859 break;
6861 case TLS_MODEL_LOCAL_DYNAMIC:
6862 base = gen_reg_rtx (Pmode);
6863 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6865 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6867 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6869 start_sequence ();
6870 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6871 insns = get_insns ();
6872 end_sequence ();
6874 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6875 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6876 emit_libcall_block (insns, base, rax, note);
6878 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6879 emit_insn (gen_tls_local_dynamic_base_64 (base));
6880 else
6881 emit_insn (gen_tls_local_dynamic_base_32 (base));
6883 if (TARGET_GNU2_TLS)
6885 rtx x = ix86_tls_module_base ();
6887 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6888 gen_rtx_MINUS (Pmode, x, tp));
6891 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6892 off = gen_rtx_CONST (Pmode, off);
6894 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6896 if (TARGET_GNU2_TLS)
6898 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6900 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6903 break;
6905 case TLS_MODEL_INITIAL_EXEC:
6906 if (TARGET_64BIT)
6908 pic = NULL;
6909 type = UNSPEC_GOTNTPOFF;
6911 else if (flag_pic)
6913 if (reload_in_progress)
6914 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6915 pic = pic_offset_table_rtx;
6916 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6918 else if (!TARGET_ANY_GNU_TLS)
6920 pic = gen_reg_rtx (Pmode);
6921 emit_insn (gen_set_got (pic));
6922 type = UNSPEC_GOTTPOFF;
6924 else
6926 pic = NULL;
6927 type = UNSPEC_INDNTPOFF;
6930 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6931 off = gen_rtx_CONST (Pmode, off);
6932 if (pic)
6933 off = gen_rtx_PLUS (Pmode, pic, off);
6934 off = gen_const_mem (Pmode, off);
6935 set_mem_alias_set (off, ix86_GOT_alias_set ());
6937 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6939 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6940 off = force_reg (Pmode, off);
6941 return gen_rtx_PLUS (Pmode, base, off);
6943 else
6945 base = get_thread_pointer (true);
6946 dest = gen_reg_rtx (Pmode);
6947 emit_insn (gen_subsi3 (dest, base, off));
6949 break;
6951 case TLS_MODEL_LOCAL_EXEC:
6952 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6953 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6954 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6955 off = gen_rtx_CONST (Pmode, off);
6957 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6959 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6960 return gen_rtx_PLUS (Pmode, base, off);
6962 else
6964 base = get_thread_pointer (true);
6965 dest = gen_reg_rtx (Pmode);
6966 emit_insn (gen_subsi3 (dest, base, off));
6968 break;
6970 default:
6971 gcc_unreachable ();
6974 return dest;
6977 /* Try machine-dependent ways of modifying an illegitimate address
6978 to be legitimate. If we find one, return the new, valid address.
6979 This macro is used in only one place: `memory_address' in explow.c.
6981 OLDX is the address as it was before break_out_memory_refs was called.
6982 In some cases it is useful to look at this to decide what needs to be done.
6984 MODE and WIN are passed so that this macro can use
6985 GO_IF_LEGITIMATE_ADDRESS.
6987 It is always safe for this macro to do nothing. It exists to recognize
6988 opportunities to optimize the output.
6990 For the 80386, we handle X+REG by loading X into a register R and
6991 using R+REG. R will go in a general reg and indexing will be used.
6992 However, if REG is a broken-out memory address or multiplication,
6993 nothing needs to be done because REG can certainly go in a general reg.
6995 When -fpic is used, special handling is needed for symbolic references.
6996 See comments by legitimize_pic_address in i386.c for details. */
6999 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7001 int changed = 0;
7002 unsigned log;
7004 if (TARGET_DEBUG_ADDR)
7006 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7007 GET_MODE_NAME (mode));
7008 debug_rtx (x);
7011 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7012 if (log)
7013 return legitimize_tls_address (x, log, false);
7014 if (GET_CODE (x) == CONST
7015 && GET_CODE (XEXP (x, 0)) == PLUS
7016 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7017 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7019 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7020 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7023 if (flag_pic && SYMBOLIC_CONST (x))
7024 return legitimize_pic_address (x, 0);
7026 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7027 if (GET_CODE (x) == ASHIFT
7028 && GET_CODE (XEXP (x, 1)) == CONST_INT
7029 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7031 changed = 1;
7032 log = INTVAL (XEXP (x, 1));
7033 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7034 GEN_INT (1 << log));
7037 if (GET_CODE (x) == PLUS)
7039 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7041 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7042 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7043 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7045 changed = 1;
7046 log = INTVAL (XEXP (XEXP (x, 0), 1));
7047 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7048 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7049 GEN_INT (1 << log));
7052 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7053 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7054 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7056 changed = 1;
7057 log = INTVAL (XEXP (XEXP (x, 1), 1));
7058 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7059 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7060 GEN_INT (1 << log));
7063 /* Put multiply first if it isn't already. */
7064 if (GET_CODE (XEXP (x, 1)) == MULT)
7066 rtx tmp = XEXP (x, 0);
7067 XEXP (x, 0) = XEXP (x, 1);
7068 XEXP (x, 1) = tmp;
7069 changed = 1;
7072 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7073 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7074 created by virtual register instantiation, register elimination, and
7075 similar optimizations. */
7076 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7078 changed = 1;
7079 x = gen_rtx_PLUS (Pmode,
7080 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7081 XEXP (XEXP (x, 1), 0)),
7082 XEXP (XEXP (x, 1), 1));
7085 /* Canonicalize
7086 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7087 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7088 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7089 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7090 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7091 && CONSTANT_P (XEXP (x, 1)))
7093 rtx constant;
7094 rtx other = NULL_RTX;
7096 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7098 constant = XEXP (x, 1);
7099 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7101 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7103 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7104 other = XEXP (x, 1);
7106 else
7107 constant = 0;
7109 if (constant)
7111 changed = 1;
7112 x = gen_rtx_PLUS (Pmode,
7113 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7114 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7115 plus_constant (other, INTVAL (constant)));
7119 if (changed && legitimate_address_p (mode, x, FALSE))
7120 return x;
7122 if (GET_CODE (XEXP (x, 0)) == MULT)
7124 changed = 1;
7125 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7128 if (GET_CODE (XEXP (x, 1)) == MULT)
7130 changed = 1;
7131 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7134 if (changed
7135 && GET_CODE (XEXP (x, 1)) == REG
7136 && GET_CODE (XEXP (x, 0)) == REG)
7137 return x;
7139 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7141 changed = 1;
7142 x = legitimize_pic_address (x, 0);
7145 if (changed && legitimate_address_p (mode, x, FALSE))
7146 return x;
7148 if (GET_CODE (XEXP (x, 0)) == REG)
7150 rtx temp = gen_reg_rtx (Pmode);
7151 rtx val = force_operand (XEXP (x, 1), temp);
7152 if (val != temp)
7153 emit_move_insn (temp, val);
7155 XEXP (x, 1) = temp;
7156 return x;
7159 else if (GET_CODE (XEXP (x, 1)) == REG)
7161 rtx temp = gen_reg_rtx (Pmode);
7162 rtx val = force_operand (XEXP (x, 0), temp);
7163 if (val != temp)
7164 emit_move_insn (temp, val);
7166 XEXP (x, 0) = temp;
7167 return x;
7171 return x;
7174 /* Print an integer constant expression in assembler syntax. Addition
7175 and subtraction are the only arithmetic that may appear in these
7176 expressions. FILE is the stdio stream to write to, X is the rtx, and
7177 CODE is the operand print code from the output string. */
7179 static void
7180 output_pic_addr_const (FILE *file, rtx x, int code)
7182 char buf[256];
7184 switch (GET_CODE (x))
7186 case PC:
7187 gcc_assert (flag_pic);
7188 putc ('.', file);
7189 break;
7191 case SYMBOL_REF:
7192 output_addr_const (file, x);
7193 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7194 fputs ("@PLT", file);
7195 break;
7197 case LABEL_REF:
7198 x = XEXP (x, 0);
7199 /* FALLTHRU */
7200 case CODE_LABEL:
7201 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7202 assemble_name (asm_out_file, buf);
7203 break;
7205 case CONST_INT:
7206 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7207 break;
7209 case CONST:
7210 /* This used to output parentheses around the expression,
7211 but that does not work on the 386 (either ATT or BSD assembler). */
7212 output_pic_addr_const (file, XEXP (x, 0), code);
7213 break;
7215 case CONST_DOUBLE:
7216 if (GET_MODE (x) == VOIDmode)
7218 /* We can use %d if the number is <32 bits and positive. */
7219 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7220 fprintf (file, "0x%lx%08lx",
7221 (unsigned long) CONST_DOUBLE_HIGH (x),
7222 (unsigned long) CONST_DOUBLE_LOW (x));
7223 else
7224 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7226 else
7227 /* We can't handle floating point constants;
7228 PRINT_OPERAND must handle them. */
7229 output_operand_lossage ("floating constant misused");
7230 break;
7232 case PLUS:
7233 /* Some assemblers need integer constants to appear first. */
7234 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7236 output_pic_addr_const (file, XEXP (x, 0), code);
7237 putc ('+', file);
7238 output_pic_addr_const (file, XEXP (x, 1), code);
7240 else
7242 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7243 output_pic_addr_const (file, XEXP (x, 1), code);
7244 putc ('+', file);
7245 output_pic_addr_const (file, XEXP (x, 0), code);
7247 break;
7249 case MINUS:
7250 if (!TARGET_MACHO)
7251 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7252 output_pic_addr_const (file, XEXP (x, 0), code);
7253 putc ('-', file);
7254 output_pic_addr_const (file, XEXP (x, 1), code);
7255 if (!TARGET_MACHO)
7256 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7257 break;
7259 case UNSPEC:
7260 gcc_assert (XVECLEN (x, 0) == 1);
7261 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7262 switch (XINT (x, 1))
7264 case UNSPEC_GOT:
7265 fputs ("@GOT", file);
7266 break;
7267 case UNSPEC_GOTOFF:
7268 fputs ("@GOTOFF", file);
7269 break;
7270 case UNSPEC_GOTPCREL:
7271 fputs ("@GOTPCREL(%rip)", file);
7272 break;
7273 case UNSPEC_GOTTPOFF:
7274 /* FIXME: This might be @TPOFF in Sun ld too. */
7275 fputs ("@GOTTPOFF", file);
7276 break;
7277 case UNSPEC_TPOFF:
7278 fputs ("@TPOFF", file);
7279 break;
7280 case UNSPEC_NTPOFF:
7281 if (TARGET_64BIT)
7282 fputs ("@TPOFF", file);
7283 else
7284 fputs ("@NTPOFF", file);
7285 break;
7286 case UNSPEC_DTPOFF:
7287 fputs ("@DTPOFF", file);
7288 break;
7289 case UNSPEC_GOTNTPOFF:
7290 if (TARGET_64BIT)
7291 fputs ("@GOTTPOFF(%rip)", file);
7292 else
7293 fputs ("@GOTNTPOFF", file);
7294 break;
7295 case UNSPEC_INDNTPOFF:
7296 fputs ("@INDNTPOFF", file);
7297 break;
7298 default:
7299 output_operand_lossage ("invalid UNSPEC as operand");
7300 break;
7302 break;
7304 default:
7305 output_operand_lossage ("invalid expression as operand");
7309 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7310 We need to emit DTP-relative relocations. */
7312 static void
7313 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7315 fputs (ASM_LONG, file);
7316 output_addr_const (file, x);
7317 fputs ("@DTPOFF", file);
7318 switch (size)
7320 case 4:
7321 break;
7322 case 8:
7323 fputs (", 0", file);
7324 break;
7325 default:
7326 gcc_unreachable ();
7330 /* In the name of slightly smaller debug output, and to cater to
7331 general assembler lossage, recognize PIC+GOTOFF and turn it back
7332 into a direct symbol reference.
7334 On Darwin, this is necessary to avoid a crash, because Darwin
7335 has a different PIC label for each routine but the DWARF debugging
7336 information is not associated with any particular routine, so it's
7337 necessary to remove references to the PIC label from RTL stored by
7338 the DWARF output code. */
7340 static rtx
7341 ix86_delegitimize_address (rtx orig_x)
7343 rtx x = orig_x;
7344 /* reg_addend is NULL or a multiple of some register. */
7345 rtx reg_addend = NULL_RTX;
7346 /* const_addend is NULL or a const_int. */
7347 rtx const_addend = NULL_RTX;
7348 /* This is the result, or NULL. */
7349 rtx result = NULL_RTX;
7351 if (GET_CODE (x) == MEM)
7352 x = XEXP (x, 0);
7354 if (TARGET_64BIT)
7356 if (GET_CODE (x) != CONST
7357 || GET_CODE (XEXP (x, 0)) != UNSPEC
7358 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7359 || GET_CODE (orig_x) != MEM)
7360 return orig_x;
7361 return XVECEXP (XEXP (x, 0), 0, 0);
7364 if (GET_CODE (x) != PLUS
7365 || GET_CODE (XEXP (x, 1)) != CONST)
7366 return orig_x;
7368 if (GET_CODE (XEXP (x, 0)) == REG
7369 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7370 /* %ebx + GOT/GOTOFF */
7372 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7374 /* %ebx + %reg * scale + GOT/GOTOFF */
7375 reg_addend = XEXP (x, 0);
7376 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7377 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7378 reg_addend = XEXP (reg_addend, 1);
7379 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7380 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7381 reg_addend = XEXP (reg_addend, 0);
7382 else
7383 return orig_x;
7384 if (GET_CODE (reg_addend) != REG
7385 && GET_CODE (reg_addend) != MULT
7386 && GET_CODE (reg_addend) != ASHIFT)
7387 return orig_x;
7389 else
7390 return orig_x;
7392 x = XEXP (XEXP (x, 1), 0);
7393 if (GET_CODE (x) == PLUS
7394 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7396 const_addend = XEXP (x, 1);
7397 x = XEXP (x, 0);
7400 if (GET_CODE (x) == UNSPEC
7401 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7402 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7403 result = XVECEXP (x, 0, 0);
7405 if (TARGET_MACHO && darwin_local_data_pic (x)
7406 && GET_CODE (orig_x) != MEM)
7407 result = XEXP (x, 0);
7409 if (! result)
7410 return orig_x;
7412 if (const_addend)
7413 result = gen_rtx_PLUS (Pmode, result, const_addend);
7414 if (reg_addend)
7415 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7416 return result;
7419 static void
7420 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7421 int fp, FILE *file)
7423 const char *suffix;
7425 if (mode == CCFPmode || mode == CCFPUmode)
7427 enum rtx_code second_code, bypass_code;
7428 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7429 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7430 code = ix86_fp_compare_code_to_integer (code);
7431 mode = CCmode;
7433 if (reverse)
7434 code = reverse_condition (code);
7436 switch (code)
7438 case EQ:
7439 suffix = "e";
7440 break;
7441 case NE:
7442 suffix = "ne";
7443 break;
7444 case GT:
7445 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7446 suffix = "g";
7447 break;
7448 case GTU:
7449 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7450 Those same assemblers have the same but opposite lossage on cmov. */
7451 gcc_assert (mode == CCmode);
7452 suffix = fp ? "nbe" : "a";
7453 break;
7454 case LT:
7455 switch (mode)
7457 case CCNOmode:
7458 case CCGOCmode:
7459 suffix = "s";
7460 break;
7462 case CCmode:
7463 case CCGCmode:
7464 suffix = "l";
7465 break;
7467 default:
7468 gcc_unreachable ();
7470 break;
7471 case LTU:
7472 gcc_assert (mode == CCmode);
7473 suffix = "b";
7474 break;
7475 case GE:
7476 switch (mode)
7478 case CCNOmode:
7479 case CCGOCmode:
7480 suffix = "ns";
7481 break;
7483 case CCmode:
7484 case CCGCmode:
7485 suffix = "ge";
7486 break;
7488 default:
7489 gcc_unreachable ();
7491 break;
7492 case GEU:
7493 /* ??? As above. */
7494 gcc_assert (mode == CCmode);
7495 suffix = fp ? "nb" : "ae";
7496 break;
7497 case LE:
7498 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7499 suffix = "le";
7500 break;
7501 case LEU:
7502 gcc_assert (mode == CCmode);
7503 suffix = "be";
7504 break;
7505 case UNORDERED:
7506 suffix = fp ? "u" : "p";
7507 break;
7508 case ORDERED:
7509 suffix = fp ? "nu" : "np";
7510 break;
7511 default:
7512 gcc_unreachable ();
7514 fputs (suffix, file);
7517 /* Print the name of register X to FILE based on its machine mode and number.
7518 If CODE is 'w', pretend the mode is HImode.
7519 If CODE is 'b', pretend the mode is QImode.
7520 If CODE is 'k', pretend the mode is SImode.
7521 If CODE is 'q', pretend the mode is DImode.
7522 If CODE is 'h', pretend the reg is the 'high' byte register.
7523 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7525 void
7526 print_reg (rtx x, int code, FILE *file)
7528 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7529 && REGNO (x) != FRAME_POINTER_REGNUM
7530 && REGNO (x) != FLAGS_REG
7531 && REGNO (x) != FPSR_REG
7532 && REGNO (x) != FPCR_REG);
7534 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7535 putc ('%', file);
7537 if (code == 'w' || MMX_REG_P (x))
7538 code = 2;
7539 else if (code == 'b')
7540 code = 1;
7541 else if (code == 'k')
7542 code = 4;
7543 else if (code == 'q')
7544 code = 8;
7545 else if (code == 'y')
7546 code = 3;
7547 else if (code == 'h')
7548 code = 0;
7549 else
7550 code = GET_MODE_SIZE (GET_MODE (x));
7552 /* Irritatingly, AMD extended registers use different naming convention
7553 from the normal registers. */
7554 if (REX_INT_REG_P (x))
7556 gcc_assert (TARGET_64BIT);
7557 switch (code)
7559 case 0:
7560 error ("extended registers have no high halves");
7561 break;
7562 case 1:
7563 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7564 break;
7565 case 2:
7566 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7567 break;
7568 case 4:
7569 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7570 break;
7571 case 8:
7572 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7573 break;
7574 default:
7575 error ("unsupported operand size for extended register");
7576 break;
7578 return;
7580 switch (code)
7582 case 3:
7583 if (STACK_TOP_P (x))
7585 fputs ("st(0)", file);
7586 break;
7588 /* FALLTHRU */
7589 case 8:
7590 case 4:
7591 case 12:
7592 if (! ANY_FP_REG_P (x))
7593 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7594 /* FALLTHRU */
7595 case 16:
7596 case 2:
7597 normal:
7598 fputs (hi_reg_name[REGNO (x)], file);
7599 break;
7600 case 1:
7601 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7602 goto normal;
7603 fputs (qi_reg_name[REGNO (x)], file);
7604 break;
7605 case 0:
7606 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7607 goto normal;
7608 fputs (qi_high_reg_name[REGNO (x)], file);
7609 break;
7610 default:
7611 gcc_unreachable ();
7615 /* Locate some local-dynamic symbol still in use by this function
7616 so that we can print its name in some tls_local_dynamic_base
7617 pattern. */
7619 static const char *
7620 get_some_local_dynamic_name (void)
7622 rtx insn;
7624 if (cfun->machine->some_ld_name)
7625 return cfun->machine->some_ld_name;
7627 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7628 if (INSN_P (insn)
7629 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7630 return cfun->machine->some_ld_name;
7632 gcc_unreachable ();
7635 static int
7636 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7638 rtx x = *px;
7640 if (GET_CODE (x) == SYMBOL_REF
7641 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7643 cfun->machine->some_ld_name = XSTR (x, 0);
7644 return 1;
7647 return 0;
7650 /* Meaning of CODE:
7651 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7652 C -- print opcode suffix for set/cmov insn.
7653 c -- like C, but print reversed condition
7654 F,f -- likewise, but for floating-point.
7655 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7656 otherwise nothing
7657 R -- print the prefix for register names.
7658 z -- print the opcode suffix for the size of the current operand.
7659 * -- print a star (in certain assembler syntax)
7660 A -- print an absolute memory reference.
7661 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7662 s -- print a shift double count, followed by the assemblers argument
7663 delimiter.
7664 b -- print the QImode name of the register for the indicated operand.
7665 %b0 would print %al if operands[0] is reg 0.
7666 w -- likewise, print the HImode name of the register.
7667 k -- likewise, print the SImode name of the register.
7668 q -- likewise, print the DImode name of the register.
7669 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7670 y -- print "st(0)" instead of "st" as a register.
7671 D -- print condition for SSE cmp instruction.
7672 P -- if PIC, print an @PLT suffix.
7673 X -- don't print any sort of PIC '@' suffix for a symbol.
7674 & -- print some in-use local-dynamic symbol name.
7675 H -- print a memory address offset by 8; used for sse high-parts
7678 void
7679 print_operand (FILE *file, rtx x, int code)
7681 if (code)
7683 switch (code)
7685 case '*':
7686 if (ASSEMBLER_DIALECT == ASM_ATT)
7687 putc ('*', file);
7688 return;
7690 case '&':
7691 assemble_name (file, get_some_local_dynamic_name ());
7692 return;
7694 case 'A':
7695 switch (ASSEMBLER_DIALECT)
7697 case ASM_ATT:
7698 putc ('*', file);
7699 break;
7701 case ASM_INTEL:
7702 /* Intel syntax. For absolute addresses, registers should not
7703 be surrounded by braces. */
7704 if (GET_CODE (x) != REG)
7706 putc ('[', file);
7707 PRINT_OPERAND (file, x, 0);
7708 putc (']', file);
7709 return;
7711 break;
7713 default:
7714 gcc_unreachable ();
7717 PRINT_OPERAND (file, x, 0);
7718 return;
7721 case 'L':
7722 if (ASSEMBLER_DIALECT == ASM_ATT)
7723 putc ('l', file);
7724 return;
7726 case 'W':
7727 if (ASSEMBLER_DIALECT == ASM_ATT)
7728 putc ('w', file);
7729 return;
7731 case 'B':
7732 if (ASSEMBLER_DIALECT == ASM_ATT)
7733 putc ('b', file);
7734 return;
7736 case 'Q':
7737 if (ASSEMBLER_DIALECT == ASM_ATT)
7738 putc ('l', file);
7739 return;
7741 case 'S':
7742 if (ASSEMBLER_DIALECT == ASM_ATT)
7743 putc ('s', file);
7744 return;
7746 case 'T':
7747 if (ASSEMBLER_DIALECT == ASM_ATT)
7748 putc ('t', file);
7749 return;
7751 case 'z':
7752 /* 387 opcodes don't get size suffixes if the operands are
7753 registers. */
7754 if (STACK_REG_P (x))
7755 return;
7757 /* Likewise if using Intel opcodes. */
7758 if (ASSEMBLER_DIALECT == ASM_INTEL)
7759 return;
7761 /* This is the size of op from size of operand. */
7762 switch (GET_MODE_SIZE (GET_MODE (x)))
7764 case 2:
7765 #ifdef HAVE_GAS_FILDS_FISTS
7766 putc ('s', file);
7767 #endif
7768 return;
7770 case 4:
7771 if (GET_MODE (x) == SFmode)
7773 putc ('s', file);
7774 return;
7776 else
7777 putc ('l', file);
7778 return;
7780 case 12:
7781 case 16:
7782 putc ('t', file);
7783 return;
7785 case 8:
7786 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7788 #ifdef GAS_MNEMONICS
7789 putc ('q', file);
7790 #else
7791 putc ('l', file);
7792 putc ('l', file);
7793 #endif
7795 else
7796 putc ('l', file);
7797 return;
7799 default:
7800 gcc_unreachable ();
7803 case 'b':
7804 case 'w':
7805 case 'k':
7806 case 'q':
7807 case 'h':
7808 case 'y':
7809 case 'X':
7810 case 'P':
7811 break;
7813 case 's':
7814 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7816 PRINT_OPERAND (file, x, 0);
7817 putc (',', file);
7819 return;
7821 case 'D':
7822 /* Little bit of braindamage here. The SSE compare instructions
7823 does use completely different names for the comparisons that the
7824 fp conditional moves. */
7825 switch (GET_CODE (x))
7827 case EQ:
7828 case UNEQ:
7829 fputs ("eq", file);
7830 break;
7831 case LT:
7832 case UNLT:
7833 fputs ("lt", file);
7834 break;
7835 case LE:
7836 case UNLE:
7837 fputs ("le", file);
7838 break;
7839 case UNORDERED:
7840 fputs ("unord", file);
7841 break;
7842 case NE:
7843 case LTGT:
7844 fputs ("neq", file);
7845 break;
7846 case UNGE:
7847 case GE:
7848 fputs ("nlt", file);
7849 break;
7850 case UNGT:
7851 case GT:
7852 fputs ("nle", file);
7853 break;
7854 case ORDERED:
7855 fputs ("ord", file);
7856 break;
7857 default:
7858 gcc_unreachable ();
7860 return;
7861 case 'O':
7862 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7863 if (ASSEMBLER_DIALECT == ASM_ATT)
7865 switch (GET_MODE (x))
7867 case HImode: putc ('w', file); break;
7868 case SImode:
7869 case SFmode: putc ('l', file); break;
7870 case DImode:
7871 case DFmode: putc ('q', file); break;
7872 default: gcc_unreachable ();
7874 putc ('.', file);
7876 #endif
7877 return;
7878 case 'C':
7879 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7880 return;
7881 case 'F':
7882 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7883 if (ASSEMBLER_DIALECT == ASM_ATT)
7884 putc ('.', file);
7885 #endif
7886 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7887 return;
7889 /* Like above, but reverse condition */
7890 case 'c':
7891 /* Check to see if argument to %c is really a constant
7892 and not a condition code which needs to be reversed. */
7893 if (!COMPARISON_P (x))
7895 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7896 return;
7898 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7899 return;
7900 case 'f':
7901 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7902 if (ASSEMBLER_DIALECT == ASM_ATT)
7903 putc ('.', file);
7904 #endif
7905 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7906 return;
7908 case 'H':
7909 /* It doesn't actually matter what mode we use here, as we're
7910 only going to use this for printing. */
7911 x = adjust_address_nv (x, DImode, 8);
7912 break;
7914 case '+':
7916 rtx x;
7918 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7919 return;
7921 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7922 if (x)
7924 int pred_val = INTVAL (XEXP (x, 0));
7926 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7927 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7929 int taken = pred_val > REG_BR_PROB_BASE / 2;
7930 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7932 /* Emit hints only in the case default branch prediction
7933 heuristics would fail. */
7934 if (taken != cputaken)
7936 /* We use 3e (DS) prefix for taken branches and
7937 2e (CS) prefix for not taken branches. */
7938 if (taken)
7939 fputs ("ds ; ", file);
7940 else
7941 fputs ("cs ; ", file);
7945 return;
7947 default:
7948 output_operand_lossage ("invalid operand code '%c'", code);
7952 if (GET_CODE (x) == REG)
7953 print_reg (x, code, file);
7955 else if (GET_CODE (x) == MEM)
7957 /* No `byte ptr' prefix for call instructions. */
7958 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7960 const char * size;
7961 switch (GET_MODE_SIZE (GET_MODE (x)))
7963 case 1: size = "BYTE"; break;
7964 case 2: size = "WORD"; break;
7965 case 4: size = "DWORD"; break;
7966 case 8: size = "QWORD"; break;
7967 case 12: size = "XWORD"; break;
7968 case 16: size = "XMMWORD"; break;
7969 default:
7970 gcc_unreachable ();
7973 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7974 if (code == 'b')
7975 size = "BYTE";
7976 else if (code == 'w')
7977 size = "WORD";
7978 else if (code == 'k')
7979 size = "DWORD";
7981 fputs (size, file);
7982 fputs (" PTR ", file);
7985 x = XEXP (x, 0);
7986 /* Avoid (%rip) for call operands. */
7987 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7988 && GET_CODE (x) != CONST_INT)
7989 output_addr_const (file, x);
7990 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7991 output_operand_lossage ("invalid constraints for operand");
7992 else
7993 output_address (x);
7996 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7998 REAL_VALUE_TYPE r;
7999 long l;
8001 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8002 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8004 if (ASSEMBLER_DIALECT == ASM_ATT)
8005 putc ('$', file);
8006 fprintf (file, "0x%08lx", l);
8009 /* These float cases don't actually occur as immediate operands. */
8010 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8012 char dstr[30];
8014 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8015 fprintf (file, "%s", dstr);
8018 else if (GET_CODE (x) == CONST_DOUBLE
8019 && GET_MODE (x) == XFmode)
8021 char dstr[30];
8023 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8024 fprintf (file, "%s", dstr);
8027 else
8029 /* We have patterns that allow zero sets of memory, for instance.
8030 In 64-bit mode, we should probably support all 8-byte vectors,
8031 since we can in fact encode that into an immediate. */
8032 if (GET_CODE (x) == CONST_VECTOR)
8034 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8035 x = const0_rtx;
8038 if (code != 'P')
8040 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8042 if (ASSEMBLER_DIALECT == ASM_ATT)
8043 putc ('$', file);
8045 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8046 || GET_CODE (x) == LABEL_REF)
8048 if (ASSEMBLER_DIALECT == ASM_ATT)
8049 putc ('$', file);
8050 else
8051 fputs ("OFFSET FLAT:", file);
8054 if (GET_CODE (x) == CONST_INT)
8055 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8056 else if (flag_pic)
8057 output_pic_addr_const (file, x, code);
8058 else
8059 output_addr_const (file, x);
8063 /* Print a memory operand whose address is ADDR. */
8065 void
8066 print_operand_address (FILE *file, rtx addr)
8068 struct ix86_address parts;
8069 rtx base, index, disp;
8070 int scale;
8071 int ok = ix86_decompose_address (addr, &parts);
8073 gcc_assert (ok);
8075 base = parts.base;
8076 index = parts.index;
8077 disp = parts.disp;
8078 scale = parts.scale;
8080 switch (parts.seg)
8082 case SEG_DEFAULT:
8083 break;
8084 case SEG_FS:
8085 case SEG_GS:
8086 if (USER_LABEL_PREFIX[0] == 0)
8087 putc ('%', file);
8088 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8089 break;
8090 default:
8091 gcc_unreachable ();
8094 if (!base && !index)
8096 /* Displacement only requires special attention. */
8098 if (GET_CODE (disp) == CONST_INT)
8100 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8102 if (USER_LABEL_PREFIX[0] == 0)
8103 putc ('%', file);
8104 fputs ("ds:", file);
8106 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8108 else if (flag_pic)
8109 output_pic_addr_const (file, disp, 0);
8110 else
8111 output_addr_const (file, disp);
8113 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8114 if (TARGET_64BIT)
8116 if (GET_CODE (disp) == CONST
8117 && GET_CODE (XEXP (disp, 0)) == PLUS
8118 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8119 disp = XEXP (XEXP (disp, 0), 0);
8120 if (GET_CODE (disp) == LABEL_REF
8121 || (GET_CODE (disp) == SYMBOL_REF
8122 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8123 fputs ("(%rip)", file);
8126 else
8128 if (ASSEMBLER_DIALECT == ASM_ATT)
8130 if (disp)
8132 if (flag_pic)
8133 output_pic_addr_const (file, disp, 0);
8134 else if (GET_CODE (disp) == LABEL_REF)
8135 output_asm_label (disp);
8136 else
8137 output_addr_const (file, disp);
8140 putc ('(', file);
8141 if (base)
8142 print_reg (base, 0, file);
8143 if (index)
8145 putc (',', file);
8146 print_reg (index, 0, file);
8147 if (scale != 1)
8148 fprintf (file, ",%d", scale);
8150 putc (')', file);
8152 else
8154 rtx offset = NULL_RTX;
8156 if (disp)
8158 /* Pull out the offset of a symbol; print any symbol itself. */
8159 if (GET_CODE (disp) == CONST
8160 && GET_CODE (XEXP (disp, 0)) == PLUS
8161 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8163 offset = XEXP (XEXP (disp, 0), 1);
8164 disp = gen_rtx_CONST (VOIDmode,
8165 XEXP (XEXP (disp, 0), 0));
8168 if (flag_pic)
8169 output_pic_addr_const (file, disp, 0);
8170 else if (GET_CODE (disp) == LABEL_REF)
8171 output_asm_label (disp);
8172 else if (GET_CODE (disp) == CONST_INT)
8173 offset = disp;
8174 else
8175 output_addr_const (file, disp);
8178 putc ('[', file);
8179 if (base)
8181 print_reg (base, 0, file);
8182 if (offset)
8184 if (INTVAL (offset) >= 0)
8185 putc ('+', file);
8186 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8189 else if (offset)
8190 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8191 else
8192 putc ('0', file);
8194 if (index)
8196 putc ('+', file);
8197 print_reg (index, 0, file);
8198 if (scale != 1)
8199 fprintf (file, "*%d", scale);
8201 putc (']', file);
8206 bool
8207 output_addr_const_extra (FILE *file, rtx x)
8209 rtx op;
8211 if (GET_CODE (x) != UNSPEC)
8212 return false;
8214 op = XVECEXP (x, 0, 0);
8215 switch (XINT (x, 1))
8217 case UNSPEC_GOTTPOFF:
8218 output_addr_const (file, op);
8219 /* FIXME: This might be @TPOFF in Sun ld. */
8220 fputs ("@GOTTPOFF", file);
8221 break;
8222 case UNSPEC_TPOFF:
8223 output_addr_const (file, op);
8224 fputs ("@TPOFF", file);
8225 break;
8226 case UNSPEC_NTPOFF:
8227 output_addr_const (file, op);
8228 if (TARGET_64BIT)
8229 fputs ("@TPOFF", file);
8230 else
8231 fputs ("@NTPOFF", file);
8232 break;
8233 case UNSPEC_DTPOFF:
8234 output_addr_const (file, op);
8235 fputs ("@DTPOFF", file);
8236 break;
8237 case UNSPEC_GOTNTPOFF:
8238 output_addr_const (file, op);
8239 if (TARGET_64BIT)
8240 fputs ("@GOTTPOFF(%rip)", file);
8241 else
8242 fputs ("@GOTNTPOFF", file);
8243 break;
8244 case UNSPEC_INDNTPOFF:
8245 output_addr_const (file, op);
8246 fputs ("@INDNTPOFF", file);
8247 break;
8249 default:
8250 return false;
8253 return true;
8256 /* Split one or more DImode RTL references into pairs of SImode
8257 references. The RTL can be REG, offsettable MEM, integer constant, or
8258 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8259 split and "num" is its length. lo_half and hi_half are output arrays
8260 that parallel "operands". */
8262 void
8263 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8265 while (num--)
8267 rtx op = operands[num];
8269 /* simplify_subreg refuse to split volatile memory addresses,
8270 but we still have to handle it. */
8271 if (GET_CODE (op) == MEM)
8273 lo_half[num] = adjust_address (op, SImode, 0);
8274 hi_half[num] = adjust_address (op, SImode, 4);
8276 else
8278 lo_half[num] = simplify_gen_subreg (SImode, op,
8279 GET_MODE (op) == VOIDmode
8280 ? DImode : GET_MODE (op), 0);
8281 hi_half[num] = simplify_gen_subreg (SImode, op,
8282 GET_MODE (op) == VOIDmode
8283 ? DImode : GET_MODE (op), 4);
8287 /* Split one or more TImode RTL references into pairs of DImode
8288 references. The RTL can be REG, offsettable MEM, integer constant, or
8289 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8290 split and "num" is its length. lo_half and hi_half are output arrays
8291 that parallel "operands". */
8293 void
8294 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8296 while (num--)
8298 rtx op = operands[num];
8300 /* simplify_subreg refuse to split volatile memory addresses, but we
8301 still have to handle it. */
8302 if (GET_CODE (op) == MEM)
8304 lo_half[num] = adjust_address (op, DImode, 0);
8305 hi_half[num] = adjust_address (op, DImode, 8);
8307 else
8309 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8310 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8315 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8316 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8317 is the expression of the binary operation. The output may either be
8318 emitted here, or returned to the caller, like all output_* functions.
8320 There is no guarantee that the operands are the same mode, as they
8321 might be within FLOAT or FLOAT_EXTEND expressions. */
8323 #ifndef SYSV386_COMPAT
8324 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8325 wants to fix the assemblers because that causes incompatibility
8326 with gcc. No-one wants to fix gcc because that causes
8327 incompatibility with assemblers... You can use the option of
8328 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8329 #define SYSV386_COMPAT 1
8330 #endif
8332 const char *
8333 output_387_binary_op (rtx insn, rtx *operands)
8335 static char buf[30];
8336 const char *p;
8337 const char *ssep;
8338 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8340 #ifdef ENABLE_CHECKING
8341 /* Even if we do not want to check the inputs, this documents input
8342 constraints. Which helps in understanding the following code. */
8343 if (STACK_REG_P (operands[0])
8344 && ((REG_P (operands[1])
8345 && REGNO (operands[0]) == REGNO (operands[1])
8346 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8347 || (REG_P (operands[2])
8348 && REGNO (operands[0]) == REGNO (operands[2])
8349 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8350 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8351 ; /* ok */
8352 else
8353 gcc_assert (is_sse);
8354 #endif
8356 switch (GET_CODE (operands[3]))
8358 case PLUS:
8359 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8360 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8361 p = "fiadd";
8362 else
8363 p = "fadd";
8364 ssep = "add";
8365 break;
8367 case MINUS:
8368 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8369 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8370 p = "fisub";
8371 else
8372 p = "fsub";
8373 ssep = "sub";
8374 break;
8376 case MULT:
8377 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8378 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8379 p = "fimul";
8380 else
8381 p = "fmul";
8382 ssep = "mul";
8383 break;
8385 case DIV:
8386 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8387 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8388 p = "fidiv";
8389 else
8390 p = "fdiv";
8391 ssep = "div";
8392 break;
8394 default:
8395 gcc_unreachable ();
8398 if (is_sse)
8400 strcpy (buf, ssep);
8401 if (GET_MODE (operands[0]) == SFmode)
8402 strcat (buf, "ss\t{%2, %0|%0, %2}");
8403 else
8404 strcat (buf, "sd\t{%2, %0|%0, %2}");
8405 return buf;
8407 strcpy (buf, p);
8409 switch (GET_CODE (operands[3]))
8411 case MULT:
8412 case PLUS:
8413 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8415 rtx temp = operands[2];
8416 operands[2] = operands[1];
8417 operands[1] = temp;
8420 /* know operands[0] == operands[1]. */
8422 if (GET_CODE (operands[2]) == MEM)
8424 p = "%z2\t%2";
8425 break;
8428 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8430 if (STACK_TOP_P (operands[0]))
8431 /* How is it that we are storing to a dead operand[2]?
8432 Well, presumably operands[1] is dead too. We can't
8433 store the result to st(0) as st(0) gets popped on this
8434 instruction. Instead store to operands[2] (which I
8435 think has to be st(1)). st(1) will be popped later.
8436 gcc <= 2.8.1 didn't have this check and generated
8437 assembly code that the Unixware assembler rejected. */
8438 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8439 else
8440 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8441 break;
8444 if (STACK_TOP_P (operands[0]))
8445 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8446 else
8447 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8448 break;
8450 case MINUS:
8451 case DIV:
8452 if (GET_CODE (operands[1]) == MEM)
8454 p = "r%z1\t%1";
8455 break;
8458 if (GET_CODE (operands[2]) == MEM)
8460 p = "%z2\t%2";
8461 break;
8464 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8466 #if SYSV386_COMPAT
8467 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8468 derived assemblers, confusingly reverse the direction of
8469 the operation for fsub{r} and fdiv{r} when the
8470 destination register is not st(0). The Intel assembler
8471 doesn't have this brain damage. Read !SYSV386_COMPAT to
8472 figure out what the hardware really does. */
8473 if (STACK_TOP_P (operands[0]))
8474 p = "{p\t%0, %2|rp\t%2, %0}";
8475 else
8476 p = "{rp\t%2, %0|p\t%0, %2}";
8477 #else
8478 if (STACK_TOP_P (operands[0]))
8479 /* As above for fmul/fadd, we can't store to st(0). */
8480 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8481 else
8482 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8483 #endif
8484 break;
8487 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8489 #if SYSV386_COMPAT
8490 if (STACK_TOP_P (operands[0]))
8491 p = "{rp\t%0, %1|p\t%1, %0}";
8492 else
8493 p = "{p\t%1, %0|rp\t%0, %1}";
8494 #else
8495 if (STACK_TOP_P (operands[0]))
8496 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8497 else
8498 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8499 #endif
8500 break;
8503 if (STACK_TOP_P (operands[0]))
8505 if (STACK_TOP_P (operands[1]))
8506 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8507 else
8508 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8509 break;
8511 else if (STACK_TOP_P (operands[1]))
8513 #if SYSV386_COMPAT
8514 p = "{\t%1, %0|r\t%0, %1}";
8515 #else
8516 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8517 #endif
8519 else
8521 #if SYSV386_COMPAT
8522 p = "{r\t%2, %0|\t%0, %2}";
8523 #else
8524 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8525 #endif
8527 break;
8529 default:
8530 gcc_unreachable ();
8533 strcat (buf, p);
8534 return buf;
8537 /* Return needed mode for entity in optimize_mode_switching pass. */
8540 ix86_mode_needed (int entity, rtx insn)
8542 enum attr_i387_cw mode;
8544 /* The mode UNINITIALIZED is used to store control word after a
8545 function call or ASM pattern. The mode ANY specify that function
8546 has no requirements on the control word and make no changes in the
8547 bits we are interested in. */
8549 if (CALL_P (insn)
8550 || (NONJUMP_INSN_P (insn)
8551 && (asm_noperands (PATTERN (insn)) >= 0
8552 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8553 return I387_CW_UNINITIALIZED;
8555 if (recog_memoized (insn) < 0)
8556 return I387_CW_ANY;
8558 mode = get_attr_i387_cw (insn);
8560 switch (entity)
8562 case I387_TRUNC:
8563 if (mode == I387_CW_TRUNC)
8564 return mode;
8565 break;
8567 case I387_FLOOR:
8568 if (mode == I387_CW_FLOOR)
8569 return mode;
8570 break;
8572 case I387_CEIL:
8573 if (mode == I387_CW_CEIL)
8574 return mode;
8575 break;
8577 case I387_MASK_PM:
8578 if (mode == I387_CW_MASK_PM)
8579 return mode;
8580 break;
8582 default:
8583 gcc_unreachable ();
8586 return I387_CW_ANY;
8589 /* Output code to initialize control word copies used by trunc?f?i and
8590 rounding patterns. CURRENT_MODE is set to current control word,
8591 while NEW_MODE is set to new control word. */
8593 void
8594 emit_i387_cw_initialization (int mode)
8596 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8597 rtx new_mode;
8599 int slot;
8601 rtx reg = gen_reg_rtx (HImode);
8603 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8604 emit_move_insn (reg, copy_rtx (stored_mode));
8606 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8608 switch (mode)
8610 case I387_CW_TRUNC:
8611 /* round toward zero (truncate) */
8612 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8613 slot = SLOT_CW_TRUNC;
8614 break;
8616 case I387_CW_FLOOR:
8617 /* round down toward -oo */
8618 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8619 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8620 slot = SLOT_CW_FLOOR;
8621 break;
8623 case I387_CW_CEIL:
8624 /* round up toward +oo */
8625 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8626 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8627 slot = SLOT_CW_CEIL;
8628 break;
8630 case I387_CW_MASK_PM:
8631 /* mask precision exception for nearbyint() */
8632 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8633 slot = SLOT_CW_MASK_PM;
8634 break;
8636 default:
8637 gcc_unreachable ();
8640 else
8642 switch (mode)
8644 case I387_CW_TRUNC:
8645 /* round toward zero (truncate) */
8646 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8647 slot = SLOT_CW_TRUNC;
8648 break;
8650 case I387_CW_FLOOR:
8651 /* round down toward -oo */
8652 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8653 slot = SLOT_CW_FLOOR;
8654 break;
8656 case I387_CW_CEIL:
8657 /* round up toward +oo */
8658 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8659 slot = SLOT_CW_CEIL;
8660 break;
8662 case I387_CW_MASK_PM:
8663 /* mask precision exception for nearbyint() */
8664 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8665 slot = SLOT_CW_MASK_PM;
8666 break;
8668 default:
8669 gcc_unreachable ();
8673 gcc_assert (slot < MAX_386_STACK_LOCALS);
8675 new_mode = assign_386_stack_local (HImode, slot);
8676 emit_move_insn (new_mode, reg);
8679 /* Output code for INSN to convert a float to a signed int. OPERANDS
8680 are the insn operands. The output may be [HSD]Imode and the input
8681 operand may be [SDX]Fmode. */
8683 const char *
8684 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8686 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8687 int dimode_p = GET_MODE (operands[0]) == DImode;
8688 int round_mode = get_attr_i387_cw (insn);
8690 /* Jump through a hoop or two for DImode, since the hardware has no
8691 non-popping instruction. We used to do this a different way, but
8692 that was somewhat fragile and broke with post-reload splitters. */
8693 if ((dimode_p || fisttp) && !stack_top_dies)
8694 output_asm_insn ("fld\t%y1", operands);
8696 gcc_assert (STACK_TOP_P (operands[1]));
8697 gcc_assert (GET_CODE (operands[0]) == MEM);
8699 if (fisttp)
8700 output_asm_insn ("fisttp%z0\t%0", operands);
8701 else
8703 if (round_mode != I387_CW_ANY)
8704 output_asm_insn ("fldcw\t%3", operands);
8705 if (stack_top_dies || dimode_p)
8706 output_asm_insn ("fistp%z0\t%0", operands);
8707 else
8708 output_asm_insn ("fist%z0\t%0", operands);
8709 if (round_mode != I387_CW_ANY)
8710 output_asm_insn ("fldcw\t%2", operands);
8713 return "";
8716 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8717 have the values zero or one, indicates the ffreep insn's operand
8718 from the OPERANDS array. */
8720 static const char *
8721 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8723 if (TARGET_USE_FFREEP)
8724 #if HAVE_AS_IX86_FFREEP
8725 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8726 #else
8728 static char retval[] = ".word\t0xc_df";
8729 int regno = REGNO (operands[opno]);
8731 gcc_assert (FP_REGNO_P (regno));
8733 retval[9] = '0' + (regno - FIRST_STACK_REG);
8734 return retval;
8736 #endif
8738 return opno ? "fstp\t%y1" : "fstp\t%y0";
8742 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8743 should be used. UNORDERED_P is true when fucom should be used. */
8745 const char *
8746 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8748 int stack_top_dies;
8749 rtx cmp_op0, cmp_op1;
8750 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8752 if (eflags_p)
8754 cmp_op0 = operands[0];
8755 cmp_op1 = operands[1];
8757 else
8759 cmp_op0 = operands[1];
8760 cmp_op1 = operands[2];
8763 if (is_sse)
8765 if (GET_MODE (operands[0]) == SFmode)
8766 if (unordered_p)
8767 return "ucomiss\t{%1, %0|%0, %1}";
8768 else
8769 return "comiss\t{%1, %0|%0, %1}";
8770 else
8771 if (unordered_p)
8772 return "ucomisd\t{%1, %0|%0, %1}";
8773 else
8774 return "comisd\t{%1, %0|%0, %1}";
8777 gcc_assert (STACK_TOP_P (cmp_op0));
8779 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8781 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8783 if (stack_top_dies)
8785 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8786 return output_387_ffreep (operands, 1);
8788 else
8789 return "ftst\n\tfnstsw\t%0";
8792 if (STACK_REG_P (cmp_op1)
8793 && stack_top_dies
8794 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8795 && REGNO (cmp_op1) != FIRST_STACK_REG)
8797 /* If both the top of the 387 stack dies, and the other operand
8798 is also a stack register that dies, then this must be a
8799 `fcompp' float compare */
8801 if (eflags_p)
8803 /* There is no double popping fcomi variant. Fortunately,
8804 eflags is immune from the fstp's cc clobbering. */
8805 if (unordered_p)
8806 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8807 else
8808 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8809 return output_387_ffreep (operands, 0);
8811 else
8813 if (unordered_p)
8814 return "fucompp\n\tfnstsw\t%0";
8815 else
8816 return "fcompp\n\tfnstsw\t%0";
8819 else
8821 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8823 static const char * const alt[16] =
8825 "fcom%z2\t%y2\n\tfnstsw\t%0",
8826 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8827 "fucom%z2\t%y2\n\tfnstsw\t%0",
8828 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8830 "ficom%z2\t%y2\n\tfnstsw\t%0",
8831 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8832 NULL,
8833 NULL,
8835 "fcomi\t{%y1, %0|%0, %y1}",
8836 "fcomip\t{%y1, %0|%0, %y1}",
8837 "fucomi\t{%y1, %0|%0, %y1}",
8838 "fucomip\t{%y1, %0|%0, %y1}",
8840 NULL,
8841 NULL,
8842 NULL,
8843 NULL
8846 int mask;
8847 const char *ret;
8849 mask = eflags_p << 3;
8850 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8851 mask |= unordered_p << 1;
8852 mask |= stack_top_dies;
8854 gcc_assert (mask < 16);
8855 ret = alt[mask];
8856 gcc_assert (ret);
8858 return ret;
8862 void
8863 ix86_output_addr_vec_elt (FILE *file, int value)
8865 const char *directive = ASM_LONG;
8867 #ifdef ASM_QUAD
8868 if (TARGET_64BIT)
8869 directive = ASM_QUAD;
8870 #else
8871 gcc_assert (!TARGET_64BIT);
8872 #endif
8874 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8877 void
8878 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8880 if (TARGET_64BIT)
8881 fprintf (file, "%s%s%d-%s%d\n",
8882 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8883 else if (HAVE_AS_GOTOFF_IN_DATA)
8884 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8885 #if TARGET_MACHO
8886 else if (TARGET_MACHO)
8888 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8889 machopic_output_function_base_name (file);
8890 fprintf(file, "\n");
8892 #endif
8893 else
8894 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8895 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8898 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8899 for the target. */
8901 void
8902 ix86_expand_clear (rtx dest)
8904 rtx tmp;
8906 /* We play register width games, which are only valid after reload. */
8907 gcc_assert (reload_completed);
8909 /* Avoid HImode and its attendant prefix byte. */
8910 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8911 dest = gen_rtx_REG (SImode, REGNO (dest));
8913 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8915 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8916 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8918 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8919 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8922 emit_insn (tmp);
8925 /* X is an unchanging MEM. If it is a constant pool reference, return
8926 the constant pool rtx, else NULL. */
8929 maybe_get_pool_constant (rtx x)
8931 x = ix86_delegitimize_address (XEXP (x, 0));
8933 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8934 return get_pool_constant (x);
8936 return NULL_RTX;
8939 void
8940 ix86_expand_move (enum machine_mode mode, rtx operands[])
8942 int strict = (reload_in_progress || reload_completed);
8943 rtx op0, op1;
8944 enum tls_model model;
8946 op0 = operands[0];
8947 op1 = operands[1];
8949 if (GET_CODE (op1) == SYMBOL_REF)
8951 model = SYMBOL_REF_TLS_MODEL (op1);
8952 if (model)
8954 op1 = legitimize_tls_address (op1, model, true);
8955 op1 = force_operand (op1, op0);
8956 if (op1 == op0)
8957 return;
8960 else if (GET_CODE (op1) == CONST
8961 && GET_CODE (XEXP (op1, 0)) == PLUS
8962 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8964 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8965 if (model)
8967 rtx addend = XEXP (XEXP (op1, 0), 1);
8968 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8969 op1 = force_operand (op1, NULL);
8970 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8971 op0, 1, OPTAB_DIRECT);
8972 if (op1 == op0)
8973 return;
8977 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8979 if (TARGET_MACHO && !TARGET_64BIT)
8981 #if TARGET_MACHO
8982 if (MACHOPIC_PURE)
8984 rtx temp = ((reload_in_progress
8985 || ((op0 && GET_CODE (op0) == REG)
8986 && mode == Pmode))
8987 ? op0 : gen_reg_rtx (Pmode));
8988 op1 = machopic_indirect_data_reference (op1, temp);
8989 op1 = machopic_legitimize_pic_address (op1, mode,
8990 temp == op1 ? 0 : temp);
8992 else if (MACHOPIC_INDIRECT)
8993 op1 = machopic_indirect_data_reference (op1, 0);
8994 if (op0 == op1)
8995 return;
8996 #endif
8998 else
9000 if (GET_CODE (op0) == MEM)
9001 op1 = force_reg (Pmode, op1);
9002 else
9003 op1 = legitimize_address (op1, op1, Pmode);
9006 else
9008 if (GET_CODE (op0) == MEM
9009 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9010 || !push_operand (op0, mode))
9011 && GET_CODE (op1) == MEM)
9012 op1 = force_reg (mode, op1);
9014 if (push_operand (op0, mode)
9015 && ! general_no_elim_operand (op1, mode))
9016 op1 = copy_to_mode_reg (mode, op1);
9018 /* Force large constants in 64bit compilation into register
9019 to get them CSEed. */
9020 if (TARGET_64BIT && mode == DImode
9021 && immediate_operand (op1, mode)
9022 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9023 && !register_operand (op0, mode)
9024 && optimize && !reload_completed && !reload_in_progress)
9025 op1 = copy_to_mode_reg (mode, op1);
9027 if (FLOAT_MODE_P (mode))
9029 /* If we are loading a floating point constant to a register,
9030 force the value to memory now, since we'll get better code
9031 out the back end. */
9033 if (strict)
9035 else if (GET_CODE (op1) == CONST_DOUBLE)
9037 op1 = validize_mem (force_const_mem (mode, op1));
9038 if (!register_operand (op0, mode))
9040 rtx temp = gen_reg_rtx (mode);
9041 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9042 emit_move_insn (op0, temp);
9043 return;
9049 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9052 void
9053 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9055 rtx op0 = operands[0], op1 = operands[1];
9057 /* Force constants other than zero into memory. We do not know how
9058 the instructions used to build constants modify the upper 64 bits
9059 of the register, once we have that information we may be able
9060 to handle some of them more efficiently. */
9061 if ((reload_in_progress | reload_completed) == 0
9062 && register_operand (op0, mode)
9063 && CONSTANT_P (op1)
9064 && standard_sse_constant_p (op1) <= 0)
9065 op1 = validize_mem (force_const_mem (mode, op1));
9067 /* Make operand1 a register if it isn't already. */
9068 if (!no_new_pseudos
9069 && !register_operand (op0, mode)
9070 && !register_operand (op1, mode))
9072 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9073 return;
9076 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9079 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9080 straight to ix86_expand_vector_move. */
9082 void
9083 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9085 rtx op0, op1, m;
9087 op0 = operands[0];
9088 op1 = operands[1];
9090 if (MEM_P (op1))
9092 /* If we're optimizing for size, movups is the smallest. */
9093 if (optimize_size)
9095 op0 = gen_lowpart (V4SFmode, op0);
9096 op1 = gen_lowpart (V4SFmode, op1);
9097 emit_insn (gen_sse_movups (op0, op1));
9098 return;
9101 /* ??? If we have typed data, then it would appear that using
9102 movdqu is the only way to get unaligned data loaded with
9103 integer type. */
9104 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9106 op0 = gen_lowpart (V16QImode, op0);
9107 op1 = gen_lowpart (V16QImode, op1);
9108 emit_insn (gen_sse2_movdqu (op0, op1));
9109 return;
9112 if (TARGET_SSE2 && mode == V2DFmode)
9114 rtx zero;
9116 /* When SSE registers are split into halves, we can avoid
9117 writing to the top half twice. */
9118 if (TARGET_SSE_SPLIT_REGS)
9120 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9121 zero = op0;
9123 else
9125 /* ??? Not sure about the best option for the Intel chips.
9126 The following would seem to satisfy; the register is
9127 entirely cleared, breaking the dependency chain. We
9128 then store to the upper half, with a dependency depth
9129 of one. A rumor has it that Intel recommends two movsd
9130 followed by an unpacklpd, but this is unconfirmed. And
9131 given that the dependency depth of the unpacklpd would
9132 still be one, I'm not sure why this would be better. */
9133 zero = CONST0_RTX (V2DFmode);
9136 m = adjust_address (op1, DFmode, 0);
9137 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9138 m = adjust_address (op1, DFmode, 8);
9139 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9141 else
9143 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9144 emit_move_insn (op0, CONST0_RTX (mode));
9145 else
9146 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9148 if (mode != V4SFmode)
9149 op0 = gen_lowpart (V4SFmode, op0);
9150 m = adjust_address (op1, V2SFmode, 0);
9151 emit_insn (gen_sse_loadlps (op0, op0, m));
9152 m = adjust_address (op1, V2SFmode, 8);
9153 emit_insn (gen_sse_loadhps (op0, op0, m));
9156 else if (MEM_P (op0))
9158 /* If we're optimizing for size, movups is the smallest. */
9159 if (optimize_size)
9161 op0 = gen_lowpart (V4SFmode, op0);
9162 op1 = gen_lowpart (V4SFmode, op1);
9163 emit_insn (gen_sse_movups (op0, op1));
9164 return;
9167 /* ??? Similar to above, only less clear because of quote
9168 typeless stores unquote. */
9169 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9170 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9172 op0 = gen_lowpart (V16QImode, op0);
9173 op1 = gen_lowpart (V16QImode, op1);
9174 emit_insn (gen_sse2_movdqu (op0, op1));
9175 return;
9178 if (TARGET_SSE2 && mode == V2DFmode)
9180 m = adjust_address (op0, DFmode, 0);
9181 emit_insn (gen_sse2_storelpd (m, op1));
9182 m = adjust_address (op0, DFmode, 8);
9183 emit_insn (gen_sse2_storehpd (m, op1));
9185 else
9187 if (mode != V4SFmode)
9188 op1 = gen_lowpart (V4SFmode, op1);
9189 m = adjust_address (op0, V2SFmode, 0);
9190 emit_insn (gen_sse_storelps (m, op1));
9191 m = adjust_address (op0, V2SFmode, 8);
9192 emit_insn (gen_sse_storehps (m, op1));
9195 else
9196 gcc_unreachable ();
9199 /* Expand a push in MODE. This is some mode for which we do not support
9200 proper push instructions, at least from the registers that we expect
9201 the value to live in. */
9203 void
9204 ix86_expand_push (enum machine_mode mode, rtx x)
9206 rtx tmp;
9208 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9209 GEN_INT (-GET_MODE_SIZE (mode)),
9210 stack_pointer_rtx, 1, OPTAB_DIRECT);
9211 if (tmp != stack_pointer_rtx)
9212 emit_move_insn (stack_pointer_rtx, tmp);
9214 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9215 emit_move_insn (tmp, x);
9218 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9219 destination to use for the operation. If different from the true
9220 destination in operands[0], a copy operation will be required. */
9223 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9224 rtx operands[])
9226 int matching_memory;
9227 rtx src1, src2, dst;
9229 dst = operands[0];
9230 src1 = operands[1];
9231 src2 = operands[2];
9233 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9234 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9235 && (rtx_equal_p (dst, src2)
9236 || immediate_operand (src1, mode)))
9238 rtx temp = src1;
9239 src1 = src2;
9240 src2 = temp;
9243 /* If the destination is memory, and we do not have matching source
9244 operands, do things in registers. */
9245 matching_memory = 0;
9246 if (GET_CODE (dst) == MEM)
9248 if (rtx_equal_p (dst, src1))
9249 matching_memory = 1;
9250 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9251 && rtx_equal_p (dst, src2))
9252 matching_memory = 2;
9253 else
9254 dst = gen_reg_rtx (mode);
9257 /* Both source operands cannot be in memory. */
9258 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9260 if (matching_memory != 2)
9261 src2 = force_reg (mode, src2);
9262 else
9263 src1 = force_reg (mode, src1);
9266 /* If the operation is not commutable, source 1 cannot be a constant
9267 or non-matching memory. */
9268 if ((CONSTANT_P (src1)
9269 || (!matching_memory && GET_CODE (src1) == MEM))
9270 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9271 src1 = force_reg (mode, src1);
9273 src1 = operands[1] = src1;
9274 src2 = operands[2] = src2;
9275 return dst;
9278 /* Similarly, but assume that the destination has already been
9279 set up properly. */
9281 void
9282 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9283 enum machine_mode mode, rtx operands[])
9285 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9286 gcc_assert (dst == operands[0]);
9289 /* Attempt to expand a binary operator. Make the expansion closer to the
9290 actual machine, then just general_operand, which will allow 3 separate
9291 memory references (one output, two input) in a single insn. */
9293 void
9294 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9295 rtx operands[])
9297 rtx src1, src2, dst, op, clob;
9299 dst = ix86_fixup_binary_operands (code, mode, operands);
9300 src1 = operands[1];
9301 src2 = operands[2];
9303 /* Emit the instruction. */
9305 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9306 if (reload_in_progress)
9308 /* Reload doesn't know about the flags register, and doesn't know that
9309 it doesn't want to clobber it. We can only do this with PLUS. */
9310 gcc_assert (code == PLUS);
9311 emit_insn (op);
9313 else
9315 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9316 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9319 /* Fix up the destination if needed. */
9320 if (dst != operands[0])
9321 emit_move_insn (operands[0], dst);
9324 /* Return TRUE or FALSE depending on whether the binary operator meets the
9325 appropriate constraints. */
9328 ix86_binary_operator_ok (enum rtx_code code,
9329 enum machine_mode mode ATTRIBUTE_UNUSED,
9330 rtx operands[3])
9332 /* Both source operands cannot be in memory. */
9333 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9334 return 0;
9335 /* If the operation is not commutable, source 1 cannot be a constant. */
9336 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9337 return 0;
9338 /* If the destination is memory, we must have a matching source operand. */
9339 if (GET_CODE (operands[0]) == MEM
9340 && ! (rtx_equal_p (operands[0], operands[1])
9341 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9342 && rtx_equal_p (operands[0], operands[2]))))
9343 return 0;
9344 /* If the operation is not commutable and the source 1 is memory, we must
9345 have a matching destination. */
9346 if (GET_CODE (operands[1]) == MEM
9347 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9348 && ! rtx_equal_p (operands[0], operands[1]))
9349 return 0;
9350 return 1;
9353 /* Attempt to expand a unary operator. Make the expansion closer to the
9354 actual machine, then just general_operand, which will allow 2 separate
9355 memory references (one output, one input) in a single insn. */
9357 void
9358 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9359 rtx operands[])
9361 int matching_memory;
9362 rtx src, dst, op, clob;
9364 dst = operands[0];
9365 src = operands[1];
9367 /* If the destination is memory, and we do not have matching source
9368 operands, do things in registers. */
9369 matching_memory = 0;
9370 if (MEM_P (dst))
9372 if (rtx_equal_p (dst, src))
9373 matching_memory = 1;
9374 else
9375 dst = gen_reg_rtx (mode);
9378 /* When source operand is memory, destination must match. */
9379 if (MEM_P (src) && !matching_memory)
9380 src = force_reg (mode, src);
9382 /* Emit the instruction. */
9384 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9385 if (reload_in_progress || code == NOT)
9387 /* Reload doesn't know about the flags register, and doesn't know that
9388 it doesn't want to clobber it. */
9389 gcc_assert (code == NOT);
9390 emit_insn (op);
9392 else
9394 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9395 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9398 /* Fix up the destination if needed. */
9399 if (dst != operands[0])
9400 emit_move_insn (operands[0], dst);
9403 /* Return TRUE or FALSE depending on whether the unary operator meets the
9404 appropriate constraints. */
9407 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9408 enum machine_mode mode ATTRIBUTE_UNUSED,
9409 rtx operands[2] ATTRIBUTE_UNUSED)
9411 /* If one of operands is memory, source and destination must match. */
9412 if ((GET_CODE (operands[0]) == MEM
9413 || GET_CODE (operands[1]) == MEM)
9414 && ! rtx_equal_p (operands[0], operands[1]))
9415 return FALSE;
9416 return TRUE;
9419 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9420 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9421 true, then replicate the mask for all elements of the vector register.
9422 If INVERT is true, then create a mask excluding the sign bit. */
9425 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9427 enum machine_mode vec_mode;
9428 HOST_WIDE_INT hi, lo;
9429 int shift = 63;
9430 rtvec v;
9431 rtx mask;
9433 /* Find the sign bit, sign extended to 2*HWI. */
9434 if (mode == SFmode)
9435 lo = 0x80000000, hi = lo < 0;
9436 else if (HOST_BITS_PER_WIDE_INT >= 64)
9437 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9438 else
9439 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9441 if (invert)
9442 lo = ~lo, hi = ~hi;
9444 /* Force this value into the low part of a fp vector constant. */
9445 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9446 mask = gen_lowpart (mode, mask);
9448 if (mode == SFmode)
9450 if (vect)
9451 v = gen_rtvec (4, mask, mask, mask, mask);
9452 else
9453 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9454 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9455 vec_mode = V4SFmode;
9457 else
9459 if (vect)
9460 v = gen_rtvec (2, mask, mask);
9461 else
9462 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9463 vec_mode = V2DFmode;
9466 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9469 /* Generate code for floating point ABS or NEG. */
9471 void
9472 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9473 rtx operands[])
9475 rtx mask, set, use, clob, dst, src;
9476 bool matching_memory;
9477 bool use_sse = false;
9478 bool vector_mode = VECTOR_MODE_P (mode);
9479 enum machine_mode elt_mode = mode;
9481 if (vector_mode)
9483 elt_mode = GET_MODE_INNER (mode);
9484 use_sse = true;
9486 else if (TARGET_SSE_MATH)
9487 use_sse = SSE_FLOAT_MODE_P (mode);
9489 /* NEG and ABS performed with SSE use bitwise mask operations.
9490 Create the appropriate mask now. */
9491 if (use_sse)
9492 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9493 else
9494 mask = NULL_RTX;
9496 dst = operands[0];
9497 src = operands[1];
9499 /* If the destination is memory, and we don't have matching source
9500 operands or we're using the x87, do things in registers. */
9501 matching_memory = false;
9502 if (MEM_P (dst))
9504 if (use_sse && rtx_equal_p (dst, src))
9505 matching_memory = true;
9506 else
9507 dst = gen_reg_rtx (mode);
9509 if (MEM_P (src) && !matching_memory)
9510 src = force_reg (mode, src);
9512 if (vector_mode)
9514 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9515 set = gen_rtx_SET (VOIDmode, dst, set);
9516 emit_insn (set);
9518 else
9520 set = gen_rtx_fmt_e (code, mode, src);
9521 set = gen_rtx_SET (VOIDmode, dst, set);
9522 if (mask)
9524 use = gen_rtx_USE (VOIDmode, mask);
9525 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9526 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9527 gen_rtvec (3, set, use, clob)));
9529 else
9530 emit_insn (set);
9533 if (dst != operands[0])
9534 emit_move_insn (operands[0], dst);
9537 /* Expand a copysign operation. Special case operand 0 being a constant. */
9539 void
9540 ix86_expand_copysign (rtx operands[])
9542 enum machine_mode mode, vmode;
9543 rtx dest, op0, op1, mask, nmask;
9545 dest = operands[0];
9546 op0 = operands[1];
9547 op1 = operands[2];
9549 mode = GET_MODE (dest);
9550 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9552 if (GET_CODE (op0) == CONST_DOUBLE)
9554 rtvec v;
9556 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9557 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9559 if (op0 == CONST0_RTX (mode))
9560 op0 = CONST0_RTX (vmode);
9561 else
9563 if (mode == SFmode)
9564 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9565 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9566 else
9567 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9568 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9571 mask = ix86_build_signbit_mask (mode, 0, 0);
9573 if (mode == SFmode)
9574 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9575 else
9576 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9578 else
9580 nmask = ix86_build_signbit_mask (mode, 0, 1);
9581 mask = ix86_build_signbit_mask (mode, 0, 0);
9583 if (mode == SFmode)
9584 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9585 else
9586 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9590 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9591 be a constant, and so has already been expanded into a vector constant. */
9593 void
9594 ix86_split_copysign_const (rtx operands[])
9596 enum machine_mode mode, vmode;
9597 rtx dest, op0, op1, mask, x;
9599 dest = operands[0];
9600 op0 = operands[1];
9601 op1 = operands[2];
9602 mask = operands[3];
9604 mode = GET_MODE (dest);
9605 vmode = GET_MODE (mask);
9607 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9608 x = gen_rtx_AND (vmode, dest, mask);
9609 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9611 if (op0 != CONST0_RTX (vmode))
9613 x = gen_rtx_IOR (vmode, dest, op0);
9614 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9618 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9619 so we have to do two masks. */
9621 void
9622 ix86_split_copysign_var (rtx operands[])
9624 enum machine_mode mode, vmode;
9625 rtx dest, scratch, op0, op1, mask, nmask, x;
9627 dest = operands[0];
9628 scratch = operands[1];
9629 op0 = operands[2];
9630 op1 = operands[3];
9631 nmask = operands[4];
9632 mask = operands[5];
9634 mode = GET_MODE (dest);
9635 vmode = GET_MODE (mask);
9637 if (rtx_equal_p (op0, op1))
9639 /* Shouldn't happen often (it's useless, obviously), but when it does
9640 we'd generate incorrect code if we continue below. */
9641 emit_move_insn (dest, op0);
9642 return;
9645 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9647 gcc_assert (REGNO (op1) == REGNO (scratch));
9649 x = gen_rtx_AND (vmode, scratch, mask);
9650 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9652 dest = mask;
9653 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9654 x = gen_rtx_NOT (vmode, dest);
9655 x = gen_rtx_AND (vmode, x, op0);
9656 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9658 else
9660 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9662 x = gen_rtx_AND (vmode, scratch, mask);
9664 else /* alternative 2,4 */
9666 gcc_assert (REGNO (mask) == REGNO (scratch));
9667 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9668 x = gen_rtx_AND (vmode, scratch, op1);
9670 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9672 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9674 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9675 x = gen_rtx_AND (vmode, dest, nmask);
9677 else /* alternative 3,4 */
9679 gcc_assert (REGNO (nmask) == REGNO (dest));
9680 dest = nmask;
9681 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9682 x = gen_rtx_AND (vmode, dest, op0);
9684 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9687 x = gen_rtx_IOR (vmode, dest, scratch);
9688 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9691 /* Return TRUE or FALSE depending on whether the first SET in INSN
9692 has source and destination with matching CC modes, and that the
9693 CC mode is at least as constrained as REQ_MODE. */
9696 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9698 rtx set;
9699 enum machine_mode set_mode;
9701 set = PATTERN (insn);
9702 if (GET_CODE (set) == PARALLEL)
9703 set = XVECEXP (set, 0, 0);
9704 gcc_assert (GET_CODE (set) == SET);
9705 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9707 set_mode = GET_MODE (SET_DEST (set));
9708 switch (set_mode)
9710 case CCNOmode:
9711 if (req_mode != CCNOmode
9712 && (req_mode != CCmode
9713 || XEXP (SET_SRC (set), 1) != const0_rtx))
9714 return 0;
9715 break;
9716 case CCmode:
9717 if (req_mode == CCGCmode)
9718 return 0;
9719 /* FALLTHRU */
9720 case CCGCmode:
9721 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9722 return 0;
9723 /* FALLTHRU */
9724 case CCGOCmode:
9725 if (req_mode == CCZmode)
9726 return 0;
9727 /* FALLTHRU */
9728 case CCZmode:
9729 break;
9731 default:
9732 gcc_unreachable ();
9735 return (GET_MODE (SET_SRC (set)) == set_mode);
9738 /* Generate insn patterns to do an integer compare of OPERANDS. */
9740 static rtx
9741 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9743 enum machine_mode cmpmode;
9744 rtx tmp, flags;
9746 cmpmode = SELECT_CC_MODE (code, op0, op1);
9747 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9749 /* This is very simple, but making the interface the same as in the
9750 FP case makes the rest of the code easier. */
9751 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9752 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9754 /* Return the test that should be put into the flags user, i.e.
9755 the bcc, scc, or cmov instruction. */
9756 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9759 /* Figure out whether to use ordered or unordered fp comparisons.
9760 Return the appropriate mode to use. */
9762 enum machine_mode
9763 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9765 /* ??? In order to make all comparisons reversible, we do all comparisons
9766 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9767 all forms trapping and nontrapping comparisons, we can make inequality
9768 comparisons trapping again, since it results in better code when using
9769 FCOM based compares. */
9770 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9773 enum machine_mode
9774 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9776 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9777 return ix86_fp_compare_mode (code);
9778 switch (code)
9780 /* Only zero flag is needed. */
9781 case EQ: /* ZF=0 */
9782 case NE: /* ZF!=0 */
9783 return CCZmode;
9784 /* Codes needing carry flag. */
9785 case GEU: /* CF=0 */
9786 case GTU: /* CF=0 & ZF=0 */
9787 case LTU: /* CF=1 */
9788 case LEU: /* CF=1 | ZF=1 */
9789 return CCmode;
9790 /* Codes possibly doable only with sign flag when
9791 comparing against zero. */
9792 case GE: /* SF=OF or SF=0 */
9793 case LT: /* SF<>OF or SF=1 */
9794 if (op1 == const0_rtx)
9795 return CCGOCmode;
9796 else
9797 /* For other cases Carry flag is not required. */
9798 return CCGCmode;
9799 /* Codes doable only with sign flag when comparing
9800 against zero, but we miss jump instruction for it
9801 so we need to use relational tests against overflow
9802 that thus needs to be zero. */
9803 case GT: /* ZF=0 & SF=OF */
9804 case LE: /* ZF=1 | SF<>OF */
9805 if (op1 == const0_rtx)
9806 return CCNOmode;
9807 else
9808 return CCGCmode;
9809 /* strcmp pattern do (use flags) and combine may ask us for proper
9810 mode. */
9811 case USE:
9812 return CCmode;
9813 default:
9814 gcc_unreachable ();
9818 /* Return the fixed registers used for condition codes. */
9820 static bool
9821 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9823 *p1 = FLAGS_REG;
9824 *p2 = FPSR_REG;
9825 return true;
9828 /* If two condition code modes are compatible, return a condition code
9829 mode which is compatible with both. Otherwise, return
9830 VOIDmode. */
9832 static enum machine_mode
9833 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9835 if (m1 == m2)
9836 return m1;
9838 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9839 return VOIDmode;
9841 if ((m1 == CCGCmode && m2 == CCGOCmode)
9842 || (m1 == CCGOCmode && m2 == CCGCmode))
9843 return CCGCmode;
9845 switch (m1)
9847 default:
9848 gcc_unreachable ();
9850 case CCmode:
9851 case CCGCmode:
9852 case CCGOCmode:
9853 case CCNOmode:
9854 case CCZmode:
9855 switch (m2)
9857 default:
9858 return VOIDmode;
9860 case CCmode:
9861 case CCGCmode:
9862 case CCGOCmode:
9863 case CCNOmode:
9864 case CCZmode:
9865 return CCmode;
9868 case CCFPmode:
9869 case CCFPUmode:
9870 /* These are only compatible with themselves, which we already
9871 checked above. */
9872 return VOIDmode;
9876 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9879 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9881 enum rtx_code swapped_code = swap_condition (code);
9882 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9883 || (ix86_fp_comparison_cost (swapped_code)
9884 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9887 /* Swap, force into registers, or otherwise massage the two operands
9888 to a fp comparison. The operands are updated in place; the new
9889 comparison code is returned. */
9891 static enum rtx_code
9892 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9894 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9895 rtx op0 = *pop0, op1 = *pop1;
9896 enum machine_mode op_mode = GET_MODE (op0);
9897 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9899 /* All of the unordered compare instructions only work on registers.
9900 The same is true of the fcomi compare instructions. The XFmode
9901 compare instructions require registers except when comparing
9902 against zero or when converting operand 1 from fixed point to
9903 floating point. */
9905 if (!is_sse
9906 && (fpcmp_mode == CCFPUmode
9907 || (op_mode == XFmode
9908 && ! (standard_80387_constant_p (op0) == 1
9909 || standard_80387_constant_p (op1) == 1)
9910 && GET_CODE (op1) != FLOAT)
9911 || ix86_use_fcomi_compare (code)))
9913 op0 = force_reg (op_mode, op0);
9914 op1 = force_reg (op_mode, op1);
9916 else
9918 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9919 things around if they appear profitable, otherwise force op0
9920 into a register. */
9922 if (standard_80387_constant_p (op0) == 0
9923 || (GET_CODE (op0) == MEM
9924 && ! (standard_80387_constant_p (op1) == 0
9925 || GET_CODE (op1) == MEM)))
9927 rtx tmp;
9928 tmp = op0, op0 = op1, op1 = tmp;
9929 code = swap_condition (code);
9932 if (GET_CODE (op0) != REG)
9933 op0 = force_reg (op_mode, op0);
9935 if (CONSTANT_P (op1))
9937 int tmp = standard_80387_constant_p (op1);
9938 if (tmp == 0)
9939 op1 = validize_mem (force_const_mem (op_mode, op1));
9940 else if (tmp == 1)
9942 if (TARGET_CMOVE)
9943 op1 = force_reg (op_mode, op1);
9945 else
9946 op1 = force_reg (op_mode, op1);
9950 /* Try to rearrange the comparison to make it cheaper. */
9951 if (ix86_fp_comparison_cost (code)
9952 > ix86_fp_comparison_cost (swap_condition (code))
9953 && (GET_CODE (op1) == REG || !no_new_pseudos))
9955 rtx tmp;
9956 tmp = op0, op0 = op1, op1 = tmp;
9957 code = swap_condition (code);
9958 if (GET_CODE (op0) != REG)
9959 op0 = force_reg (op_mode, op0);
9962 *pop0 = op0;
9963 *pop1 = op1;
9964 return code;
9967 /* Convert comparison codes we use to represent FP comparison to integer
9968 code that will result in proper branch. Return UNKNOWN if no such code
9969 is available. */
9971 enum rtx_code
9972 ix86_fp_compare_code_to_integer (enum rtx_code code)
9974 switch (code)
9976 case GT:
9977 return GTU;
9978 case GE:
9979 return GEU;
9980 case ORDERED:
9981 case UNORDERED:
9982 return code;
9983 break;
9984 case UNEQ:
9985 return EQ;
9986 break;
9987 case UNLT:
9988 return LTU;
9989 break;
9990 case UNLE:
9991 return LEU;
9992 break;
9993 case LTGT:
9994 return NE;
9995 break;
9996 default:
9997 return UNKNOWN;
10001 /* Split comparison code CODE into comparisons we can do using branch
10002 instructions. BYPASS_CODE is comparison code for branch that will
10003 branch around FIRST_CODE and SECOND_CODE. If some of branches
10004 is not required, set value to UNKNOWN.
10005 We never require more than two branches. */
10007 void
10008 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10009 enum rtx_code *first_code,
10010 enum rtx_code *second_code)
10012 *first_code = code;
10013 *bypass_code = UNKNOWN;
10014 *second_code = UNKNOWN;
10016 /* The fcomi comparison sets flags as follows:
10018 cmp ZF PF CF
10019 > 0 0 0
10020 < 0 0 1
10021 = 1 0 0
10022 un 1 1 1 */
10024 switch (code)
10026 case GT: /* GTU - CF=0 & ZF=0 */
10027 case GE: /* GEU - CF=0 */
10028 case ORDERED: /* PF=0 */
10029 case UNORDERED: /* PF=1 */
10030 case UNEQ: /* EQ - ZF=1 */
10031 case UNLT: /* LTU - CF=1 */
10032 case UNLE: /* LEU - CF=1 | ZF=1 */
10033 case LTGT: /* EQ - ZF=0 */
10034 break;
10035 case LT: /* LTU - CF=1 - fails on unordered */
10036 *first_code = UNLT;
10037 *bypass_code = UNORDERED;
10038 break;
10039 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10040 *first_code = UNLE;
10041 *bypass_code = UNORDERED;
10042 break;
10043 case EQ: /* EQ - ZF=1 - fails on unordered */
10044 *first_code = UNEQ;
10045 *bypass_code = UNORDERED;
10046 break;
10047 case NE: /* NE - ZF=0 - fails on unordered */
10048 *first_code = LTGT;
10049 *second_code = UNORDERED;
10050 break;
10051 case UNGE: /* GEU - CF=0 - fails on unordered */
10052 *first_code = GE;
10053 *second_code = UNORDERED;
10054 break;
10055 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10056 *first_code = GT;
10057 *second_code = UNORDERED;
10058 break;
10059 default:
10060 gcc_unreachable ();
10062 if (!TARGET_IEEE_FP)
10064 *second_code = UNKNOWN;
10065 *bypass_code = UNKNOWN;
10069 /* Return cost of comparison done fcom + arithmetics operations on AX.
10070 All following functions do use number of instructions as a cost metrics.
10071 In future this should be tweaked to compute bytes for optimize_size and
10072 take into account performance of various instructions on various CPUs. */
10073 static int
10074 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10076 if (!TARGET_IEEE_FP)
10077 return 4;
10078 /* The cost of code output by ix86_expand_fp_compare. */
10079 switch (code)
10081 case UNLE:
10082 case UNLT:
10083 case LTGT:
10084 case GT:
10085 case GE:
10086 case UNORDERED:
10087 case ORDERED:
10088 case UNEQ:
10089 return 4;
10090 break;
10091 case LT:
10092 case NE:
10093 case EQ:
10094 case UNGE:
10095 return 5;
10096 break;
10097 case LE:
10098 case UNGT:
10099 return 6;
10100 break;
10101 default:
10102 gcc_unreachable ();
10106 /* Return cost of comparison done using fcomi operation.
10107 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10108 static int
10109 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10111 enum rtx_code bypass_code, first_code, second_code;
10112 /* Return arbitrarily high cost when instruction is not supported - this
10113 prevents gcc from using it. */
10114 if (!TARGET_CMOVE)
10115 return 1024;
10116 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10117 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10120 /* Return cost of comparison done using sahf operation.
10121 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10122 static int
10123 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10125 enum rtx_code bypass_code, first_code, second_code;
10126 /* Return arbitrarily high cost when instruction is not preferred - this
10127 avoids gcc from using it. */
10128 if (!TARGET_USE_SAHF && !optimize_size)
10129 return 1024;
10130 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10131 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10134 /* Compute cost of the comparison done using any method.
10135 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10136 static int
10137 ix86_fp_comparison_cost (enum rtx_code code)
10139 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10140 int min;
10142 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10143 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10145 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10146 if (min > sahf_cost)
10147 min = sahf_cost;
10148 if (min > fcomi_cost)
10149 min = fcomi_cost;
10150 return min;
10153 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10155 static rtx
10156 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10157 rtx *second_test, rtx *bypass_test)
10159 enum machine_mode fpcmp_mode, intcmp_mode;
10160 rtx tmp, tmp2;
10161 int cost = ix86_fp_comparison_cost (code);
10162 enum rtx_code bypass_code, first_code, second_code;
10164 fpcmp_mode = ix86_fp_compare_mode (code);
10165 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10167 if (second_test)
10168 *second_test = NULL_RTX;
10169 if (bypass_test)
10170 *bypass_test = NULL_RTX;
10172 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10174 /* Do fcomi/sahf based test when profitable. */
10175 if ((bypass_code == UNKNOWN || bypass_test)
10176 && (second_code == UNKNOWN || second_test)
10177 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10179 if (TARGET_CMOVE)
10181 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10182 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10183 tmp);
10184 emit_insn (tmp);
10186 else
10188 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10189 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10190 if (!scratch)
10191 scratch = gen_reg_rtx (HImode);
10192 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10193 emit_insn (gen_x86_sahf_1 (scratch));
10196 /* The FP codes work out to act like unsigned. */
10197 intcmp_mode = fpcmp_mode;
10198 code = first_code;
10199 if (bypass_code != UNKNOWN)
10200 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10201 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10202 const0_rtx);
10203 if (second_code != UNKNOWN)
10204 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10205 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10206 const0_rtx);
10208 else
10210 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10211 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10212 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10213 if (!scratch)
10214 scratch = gen_reg_rtx (HImode);
10215 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10217 /* In the unordered case, we have to check C2 for NaN's, which
10218 doesn't happen to work out to anything nice combination-wise.
10219 So do some bit twiddling on the value we've got in AH to come
10220 up with an appropriate set of condition codes. */
10222 intcmp_mode = CCNOmode;
10223 switch (code)
10225 case GT:
10226 case UNGT:
10227 if (code == GT || !TARGET_IEEE_FP)
10229 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10230 code = EQ;
10232 else
10234 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10235 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10236 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10237 intcmp_mode = CCmode;
10238 code = GEU;
10240 break;
10241 case LT:
10242 case UNLT:
10243 if (code == LT && TARGET_IEEE_FP)
10245 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10246 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10247 intcmp_mode = CCmode;
10248 code = EQ;
10250 else
10252 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10253 code = NE;
10255 break;
10256 case GE:
10257 case UNGE:
10258 if (code == GE || !TARGET_IEEE_FP)
10260 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10261 code = EQ;
10263 else
10265 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10266 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10267 GEN_INT (0x01)));
10268 code = NE;
10270 break;
10271 case LE:
10272 case UNLE:
10273 if (code == LE && TARGET_IEEE_FP)
10275 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10276 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10277 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10278 intcmp_mode = CCmode;
10279 code = LTU;
10281 else
10283 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10284 code = NE;
10286 break;
10287 case EQ:
10288 case UNEQ:
10289 if (code == EQ && TARGET_IEEE_FP)
10291 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10292 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10293 intcmp_mode = CCmode;
10294 code = EQ;
10296 else
10298 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10299 code = NE;
10300 break;
10302 break;
10303 case NE:
10304 case LTGT:
10305 if (code == NE && TARGET_IEEE_FP)
10307 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10308 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10309 GEN_INT (0x40)));
10310 code = NE;
10312 else
10314 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10315 code = EQ;
10317 break;
10319 case UNORDERED:
10320 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10321 code = NE;
10322 break;
10323 case ORDERED:
10324 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10325 code = EQ;
10326 break;
10328 default:
10329 gcc_unreachable ();
10333 /* Return the test that should be put into the flags user, i.e.
10334 the bcc, scc, or cmov instruction. */
10335 return gen_rtx_fmt_ee (code, VOIDmode,
10336 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10337 const0_rtx);
10341 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10343 rtx op0, op1, ret;
10344 op0 = ix86_compare_op0;
10345 op1 = ix86_compare_op1;
10347 if (second_test)
10348 *second_test = NULL_RTX;
10349 if (bypass_test)
10350 *bypass_test = NULL_RTX;
10352 if (ix86_compare_emitted)
10354 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10355 ix86_compare_emitted = NULL_RTX;
10357 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10358 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10359 second_test, bypass_test);
10360 else
10361 ret = ix86_expand_int_compare (code, op0, op1);
10363 return ret;
10366 /* Return true if the CODE will result in nontrivial jump sequence. */
10367 bool
10368 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10370 enum rtx_code bypass_code, first_code, second_code;
10371 if (!TARGET_CMOVE)
10372 return true;
10373 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10374 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10377 void
10378 ix86_expand_branch (enum rtx_code code, rtx label)
10380 rtx tmp;
10382 /* If we have emitted a compare insn, go straight to simple.
10383 ix86_expand_compare won't emit anything if ix86_compare_emitted
10384 is non NULL. */
10385 if (ix86_compare_emitted)
10386 goto simple;
10388 switch (GET_MODE (ix86_compare_op0))
10390 case QImode:
10391 case HImode:
10392 case SImode:
10393 simple:
10394 tmp = ix86_expand_compare (code, NULL, NULL);
10395 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10396 gen_rtx_LABEL_REF (VOIDmode, label),
10397 pc_rtx);
10398 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10399 return;
10401 case SFmode:
10402 case DFmode:
10403 case XFmode:
10405 rtvec vec;
10406 int use_fcomi;
10407 enum rtx_code bypass_code, first_code, second_code;
10409 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10410 &ix86_compare_op1);
10412 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10414 /* Check whether we will use the natural sequence with one jump. If
10415 so, we can expand jump early. Otherwise delay expansion by
10416 creating compound insn to not confuse optimizers. */
10417 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10418 && TARGET_CMOVE)
10420 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10421 gen_rtx_LABEL_REF (VOIDmode, label),
10422 pc_rtx, NULL_RTX, NULL_RTX);
10424 else
10426 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10427 ix86_compare_op0, ix86_compare_op1);
10428 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10429 gen_rtx_LABEL_REF (VOIDmode, label),
10430 pc_rtx);
10431 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10433 use_fcomi = ix86_use_fcomi_compare (code);
10434 vec = rtvec_alloc (3 + !use_fcomi);
10435 RTVEC_ELT (vec, 0) = tmp;
10436 RTVEC_ELT (vec, 1)
10437 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10438 RTVEC_ELT (vec, 2)
10439 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10440 if (! use_fcomi)
10441 RTVEC_ELT (vec, 3)
10442 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10444 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10446 return;
10449 case DImode:
10450 if (TARGET_64BIT)
10451 goto simple;
10452 case TImode:
10453 /* Expand DImode branch into multiple compare+branch. */
10455 rtx lo[2], hi[2], label2;
10456 enum rtx_code code1, code2, code3;
10457 enum machine_mode submode;
10459 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10461 tmp = ix86_compare_op0;
10462 ix86_compare_op0 = ix86_compare_op1;
10463 ix86_compare_op1 = tmp;
10464 code = swap_condition (code);
10466 if (GET_MODE (ix86_compare_op0) == DImode)
10468 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10469 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10470 submode = SImode;
10472 else
10474 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10475 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10476 submode = DImode;
10479 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10480 avoid two branches. This costs one extra insn, so disable when
10481 optimizing for size. */
10483 if ((code == EQ || code == NE)
10484 && (!optimize_size
10485 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10487 rtx xor0, xor1;
10489 xor1 = hi[0];
10490 if (hi[1] != const0_rtx)
10491 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10492 NULL_RTX, 0, OPTAB_WIDEN);
10494 xor0 = lo[0];
10495 if (lo[1] != const0_rtx)
10496 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10497 NULL_RTX, 0, OPTAB_WIDEN);
10499 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10500 NULL_RTX, 0, OPTAB_WIDEN);
10502 ix86_compare_op0 = tmp;
10503 ix86_compare_op1 = const0_rtx;
10504 ix86_expand_branch (code, label);
10505 return;
10508 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10509 op1 is a constant and the low word is zero, then we can just
10510 examine the high word. */
10512 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10513 switch (code)
10515 case LT: case LTU: case GE: case GEU:
10516 ix86_compare_op0 = hi[0];
10517 ix86_compare_op1 = hi[1];
10518 ix86_expand_branch (code, label);
10519 return;
10520 default:
10521 break;
10524 /* Otherwise, we need two or three jumps. */
10526 label2 = gen_label_rtx ();
10528 code1 = code;
10529 code2 = swap_condition (code);
10530 code3 = unsigned_condition (code);
10532 switch (code)
10534 case LT: case GT: case LTU: case GTU:
10535 break;
10537 case LE: code1 = LT; code2 = GT; break;
10538 case GE: code1 = GT; code2 = LT; break;
10539 case LEU: code1 = LTU; code2 = GTU; break;
10540 case GEU: code1 = GTU; code2 = LTU; break;
10542 case EQ: code1 = UNKNOWN; code2 = NE; break;
10543 case NE: code2 = UNKNOWN; break;
10545 default:
10546 gcc_unreachable ();
10550 * a < b =>
10551 * if (hi(a) < hi(b)) goto true;
10552 * if (hi(a) > hi(b)) goto false;
10553 * if (lo(a) < lo(b)) goto true;
10554 * false:
10557 ix86_compare_op0 = hi[0];
10558 ix86_compare_op1 = hi[1];
10560 if (code1 != UNKNOWN)
10561 ix86_expand_branch (code1, label);
10562 if (code2 != UNKNOWN)
10563 ix86_expand_branch (code2, label2);
10565 ix86_compare_op0 = lo[0];
10566 ix86_compare_op1 = lo[1];
10567 ix86_expand_branch (code3, label);
10569 if (code2 != UNKNOWN)
10570 emit_label (label2);
10571 return;
10574 default:
10575 gcc_unreachable ();
10579 /* Split branch based on floating point condition. */
10580 void
10581 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10582 rtx target1, rtx target2, rtx tmp, rtx pushed)
10584 rtx second, bypass;
10585 rtx label = NULL_RTX;
10586 rtx condition;
10587 int bypass_probability = -1, second_probability = -1, probability = -1;
10588 rtx i;
10590 if (target2 != pc_rtx)
10592 rtx tmp = target2;
10593 code = reverse_condition_maybe_unordered (code);
10594 target2 = target1;
10595 target1 = tmp;
10598 condition = ix86_expand_fp_compare (code, op1, op2,
10599 tmp, &second, &bypass);
10601 /* Remove pushed operand from stack. */
10602 if (pushed)
10603 ix86_free_from_memory (GET_MODE (pushed));
10605 if (split_branch_probability >= 0)
10607 /* Distribute the probabilities across the jumps.
10608 Assume the BYPASS and SECOND to be always test
10609 for UNORDERED. */
10610 probability = split_branch_probability;
10612 /* Value of 1 is low enough to make no need for probability
10613 to be updated. Later we may run some experiments and see
10614 if unordered values are more frequent in practice. */
10615 if (bypass)
10616 bypass_probability = 1;
10617 if (second)
10618 second_probability = 1;
10620 if (bypass != NULL_RTX)
10622 label = gen_label_rtx ();
10623 i = emit_jump_insn (gen_rtx_SET
10624 (VOIDmode, pc_rtx,
10625 gen_rtx_IF_THEN_ELSE (VOIDmode,
10626 bypass,
10627 gen_rtx_LABEL_REF (VOIDmode,
10628 label),
10629 pc_rtx)));
10630 if (bypass_probability >= 0)
10631 REG_NOTES (i)
10632 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10633 GEN_INT (bypass_probability),
10634 REG_NOTES (i));
10636 i = emit_jump_insn (gen_rtx_SET
10637 (VOIDmode, pc_rtx,
10638 gen_rtx_IF_THEN_ELSE (VOIDmode,
10639 condition, target1, target2)));
10640 if (probability >= 0)
10641 REG_NOTES (i)
10642 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10643 GEN_INT (probability),
10644 REG_NOTES (i));
10645 if (second != NULL_RTX)
10647 i = emit_jump_insn (gen_rtx_SET
10648 (VOIDmode, pc_rtx,
10649 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10650 target2)));
10651 if (second_probability >= 0)
10652 REG_NOTES (i)
10653 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10654 GEN_INT (second_probability),
10655 REG_NOTES (i));
10657 if (label != NULL_RTX)
10658 emit_label (label);
10662 ix86_expand_setcc (enum rtx_code code, rtx dest)
10664 rtx ret, tmp, tmpreg, equiv;
10665 rtx second_test, bypass_test;
10667 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10668 return 0; /* FAIL */
10670 gcc_assert (GET_MODE (dest) == QImode);
10672 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10673 PUT_MODE (ret, QImode);
10675 tmp = dest;
10676 tmpreg = dest;
10678 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10679 if (bypass_test || second_test)
10681 rtx test = second_test;
10682 int bypass = 0;
10683 rtx tmp2 = gen_reg_rtx (QImode);
10684 if (bypass_test)
10686 gcc_assert (!second_test);
10687 test = bypass_test;
10688 bypass = 1;
10689 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10691 PUT_MODE (test, QImode);
10692 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10694 if (bypass)
10695 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10696 else
10697 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10700 /* Attach a REG_EQUAL note describing the comparison result. */
10701 if (ix86_compare_op0 && ix86_compare_op1)
10703 equiv = simplify_gen_relational (code, QImode,
10704 GET_MODE (ix86_compare_op0),
10705 ix86_compare_op0, ix86_compare_op1);
10706 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10709 return 1; /* DONE */
10712 /* Expand comparison setting or clearing carry flag. Return true when
10713 successful and set pop for the operation. */
10714 static bool
10715 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10717 enum machine_mode mode =
10718 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10720 /* Do not handle DImode compares that go through special path. Also we can't
10721 deal with FP compares yet. This is possible to add. */
10722 if (mode == (TARGET_64BIT ? TImode : DImode))
10723 return false;
10724 if (FLOAT_MODE_P (mode))
10726 rtx second_test = NULL, bypass_test = NULL;
10727 rtx compare_op, compare_seq;
10729 /* Shortcut: following common codes never translate into carry flag compares. */
10730 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10731 || code == ORDERED || code == UNORDERED)
10732 return false;
10734 /* These comparisons require zero flag; swap operands so they won't. */
10735 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10736 && !TARGET_IEEE_FP)
10738 rtx tmp = op0;
10739 op0 = op1;
10740 op1 = tmp;
10741 code = swap_condition (code);
10744 /* Try to expand the comparison and verify that we end up with carry flag
10745 based comparison. This is fails to be true only when we decide to expand
10746 comparison using arithmetic that is not too common scenario. */
10747 start_sequence ();
10748 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10749 &second_test, &bypass_test);
10750 compare_seq = get_insns ();
10751 end_sequence ();
10753 if (second_test || bypass_test)
10754 return false;
10755 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10756 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10757 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10758 else
10759 code = GET_CODE (compare_op);
10760 if (code != LTU && code != GEU)
10761 return false;
10762 emit_insn (compare_seq);
10763 *pop = compare_op;
10764 return true;
10766 if (!INTEGRAL_MODE_P (mode))
10767 return false;
10768 switch (code)
10770 case LTU:
10771 case GEU:
10772 break;
10774 /* Convert a==0 into (unsigned)a<1. */
10775 case EQ:
10776 case NE:
10777 if (op1 != const0_rtx)
10778 return false;
10779 op1 = const1_rtx;
10780 code = (code == EQ ? LTU : GEU);
10781 break;
10783 /* Convert a>b into b<a or a>=b-1. */
10784 case GTU:
10785 case LEU:
10786 if (GET_CODE (op1) == CONST_INT)
10788 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10789 /* Bail out on overflow. We still can swap operands but that
10790 would force loading of the constant into register. */
10791 if (op1 == const0_rtx
10792 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10793 return false;
10794 code = (code == GTU ? GEU : LTU);
10796 else
10798 rtx tmp = op1;
10799 op1 = op0;
10800 op0 = tmp;
10801 code = (code == GTU ? LTU : GEU);
10803 break;
10805 /* Convert a>=0 into (unsigned)a<0x80000000. */
10806 case LT:
10807 case GE:
10808 if (mode == DImode || op1 != const0_rtx)
10809 return false;
10810 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10811 code = (code == LT ? GEU : LTU);
10812 break;
10813 case LE:
10814 case GT:
10815 if (mode == DImode || op1 != constm1_rtx)
10816 return false;
10817 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10818 code = (code == LE ? GEU : LTU);
10819 break;
10821 default:
10822 return false;
10824 /* Swapping operands may cause constant to appear as first operand. */
10825 if (!nonimmediate_operand (op0, VOIDmode))
10827 if (no_new_pseudos)
10828 return false;
10829 op0 = force_reg (mode, op0);
10831 ix86_compare_op0 = op0;
10832 ix86_compare_op1 = op1;
10833 *pop = ix86_expand_compare (code, NULL, NULL);
10834 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10835 return true;
10839 ix86_expand_int_movcc (rtx operands[])
10841 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10842 rtx compare_seq, compare_op;
10843 rtx second_test, bypass_test;
10844 enum machine_mode mode = GET_MODE (operands[0]);
10845 bool sign_bit_compare_p = false;;
10847 start_sequence ();
10848 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10849 compare_seq = get_insns ();
10850 end_sequence ();
10852 compare_code = GET_CODE (compare_op);
10854 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10855 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10856 sign_bit_compare_p = true;
10858 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10859 HImode insns, we'd be swallowed in word prefix ops. */
10861 if ((mode != HImode || TARGET_FAST_PREFIX)
10862 && (mode != (TARGET_64BIT ? TImode : DImode))
10863 && GET_CODE (operands[2]) == CONST_INT
10864 && GET_CODE (operands[3]) == CONST_INT)
10866 rtx out = operands[0];
10867 HOST_WIDE_INT ct = INTVAL (operands[2]);
10868 HOST_WIDE_INT cf = INTVAL (operands[3]);
10869 HOST_WIDE_INT diff;
10871 diff = ct - cf;
10872 /* Sign bit compares are better done using shifts than we do by using
10873 sbb. */
10874 if (sign_bit_compare_p
10875 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10876 ix86_compare_op1, &compare_op))
10878 /* Detect overlap between destination and compare sources. */
10879 rtx tmp = out;
10881 if (!sign_bit_compare_p)
10883 bool fpcmp = false;
10885 compare_code = GET_CODE (compare_op);
10887 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10888 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10890 fpcmp = true;
10891 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10894 /* To simplify rest of code, restrict to the GEU case. */
10895 if (compare_code == LTU)
10897 HOST_WIDE_INT tmp = ct;
10898 ct = cf;
10899 cf = tmp;
10900 compare_code = reverse_condition (compare_code);
10901 code = reverse_condition (code);
10903 else
10905 if (fpcmp)
10906 PUT_CODE (compare_op,
10907 reverse_condition_maybe_unordered
10908 (GET_CODE (compare_op)));
10909 else
10910 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10912 diff = ct - cf;
10914 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10915 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10916 tmp = gen_reg_rtx (mode);
10918 if (mode == DImode)
10919 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10920 else
10921 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10923 else
10925 if (code == GT || code == GE)
10926 code = reverse_condition (code);
10927 else
10929 HOST_WIDE_INT tmp = ct;
10930 ct = cf;
10931 cf = tmp;
10932 diff = ct - cf;
10934 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10935 ix86_compare_op1, VOIDmode, 0, -1);
10938 if (diff == 1)
10941 * cmpl op0,op1
10942 * sbbl dest,dest
10943 * [addl dest, ct]
10945 * Size 5 - 8.
10947 if (ct)
10948 tmp = expand_simple_binop (mode, PLUS,
10949 tmp, GEN_INT (ct),
10950 copy_rtx (tmp), 1, OPTAB_DIRECT);
10952 else if (cf == -1)
10955 * cmpl op0,op1
10956 * sbbl dest,dest
10957 * orl $ct, dest
10959 * Size 8.
10961 tmp = expand_simple_binop (mode, IOR,
10962 tmp, GEN_INT (ct),
10963 copy_rtx (tmp), 1, OPTAB_DIRECT);
10965 else if (diff == -1 && ct)
10968 * cmpl op0,op1
10969 * sbbl dest,dest
10970 * notl dest
10971 * [addl dest, cf]
10973 * Size 8 - 11.
10975 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10976 if (cf)
10977 tmp = expand_simple_binop (mode, PLUS,
10978 copy_rtx (tmp), GEN_INT (cf),
10979 copy_rtx (tmp), 1, OPTAB_DIRECT);
10981 else
10984 * cmpl op0,op1
10985 * sbbl dest,dest
10986 * [notl dest]
10987 * andl cf - ct, dest
10988 * [addl dest, ct]
10990 * Size 8 - 11.
10993 if (cf == 0)
10995 cf = ct;
10996 ct = 0;
10997 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11000 tmp = expand_simple_binop (mode, AND,
11001 copy_rtx (tmp),
11002 gen_int_mode (cf - ct, mode),
11003 copy_rtx (tmp), 1, OPTAB_DIRECT);
11004 if (ct)
11005 tmp = expand_simple_binop (mode, PLUS,
11006 copy_rtx (tmp), GEN_INT (ct),
11007 copy_rtx (tmp), 1, OPTAB_DIRECT);
11010 if (!rtx_equal_p (tmp, out))
11011 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11013 return 1; /* DONE */
11016 if (diff < 0)
11018 HOST_WIDE_INT tmp;
11019 tmp = ct, ct = cf, cf = tmp;
11020 diff = -diff;
11021 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11023 /* We may be reversing unordered compare to normal compare, that
11024 is not valid in general (we may convert non-trapping condition
11025 to trapping one), however on i386 we currently emit all
11026 comparisons unordered. */
11027 compare_code = reverse_condition_maybe_unordered (compare_code);
11028 code = reverse_condition_maybe_unordered (code);
11030 else
11032 compare_code = reverse_condition (compare_code);
11033 code = reverse_condition (code);
11037 compare_code = UNKNOWN;
11038 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11039 && GET_CODE (ix86_compare_op1) == CONST_INT)
11041 if (ix86_compare_op1 == const0_rtx
11042 && (code == LT || code == GE))
11043 compare_code = code;
11044 else if (ix86_compare_op1 == constm1_rtx)
11046 if (code == LE)
11047 compare_code = LT;
11048 else if (code == GT)
11049 compare_code = GE;
11053 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11054 if (compare_code != UNKNOWN
11055 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11056 && (cf == -1 || ct == -1))
11058 /* If lea code below could be used, only optimize
11059 if it results in a 2 insn sequence. */
11061 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11062 || diff == 3 || diff == 5 || diff == 9)
11063 || (compare_code == LT && ct == -1)
11064 || (compare_code == GE && cf == -1))
11067 * notl op1 (if necessary)
11068 * sarl $31, op1
11069 * orl cf, op1
11071 if (ct != -1)
11073 cf = ct;
11074 ct = -1;
11075 code = reverse_condition (code);
11078 out = emit_store_flag (out, code, ix86_compare_op0,
11079 ix86_compare_op1, VOIDmode, 0, -1);
11081 out = expand_simple_binop (mode, IOR,
11082 out, GEN_INT (cf),
11083 out, 1, OPTAB_DIRECT);
11084 if (out != operands[0])
11085 emit_move_insn (operands[0], out);
11087 return 1; /* DONE */
11092 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11093 || diff == 3 || diff == 5 || diff == 9)
11094 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11095 && (mode != DImode
11096 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11099 * xorl dest,dest
11100 * cmpl op1,op2
11101 * setcc dest
11102 * lea cf(dest*(ct-cf)),dest
11104 * Size 14.
11106 * This also catches the degenerate setcc-only case.
11109 rtx tmp;
11110 int nops;
11112 out = emit_store_flag (out, code, ix86_compare_op0,
11113 ix86_compare_op1, VOIDmode, 0, 1);
11115 nops = 0;
11116 /* On x86_64 the lea instruction operates on Pmode, so we need
11117 to get arithmetics done in proper mode to match. */
11118 if (diff == 1)
11119 tmp = copy_rtx (out);
11120 else
11122 rtx out1;
11123 out1 = copy_rtx (out);
11124 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11125 nops++;
11126 if (diff & 1)
11128 tmp = gen_rtx_PLUS (mode, tmp, out1);
11129 nops++;
11132 if (cf != 0)
11134 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11135 nops++;
11137 if (!rtx_equal_p (tmp, out))
11139 if (nops == 1)
11140 out = force_operand (tmp, copy_rtx (out));
11141 else
11142 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11144 if (!rtx_equal_p (out, operands[0]))
11145 emit_move_insn (operands[0], copy_rtx (out));
11147 return 1; /* DONE */
11151 * General case: Jumpful:
11152 * xorl dest,dest cmpl op1, op2
11153 * cmpl op1, op2 movl ct, dest
11154 * setcc dest jcc 1f
11155 * decl dest movl cf, dest
11156 * andl (cf-ct),dest 1:
11157 * addl ct,dest
11159 * Size 20. Size 14.
11161 * This is reasonably steep, but branch mispredict costs are
11162 * high on modern cpus, so consider failing only if optimizing
11163 * for space.
11166 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11167 && BRANCH_COST >= 2)
11169 if (cf == 0)
11171 cf = ct;
11172 ct = 0;
11173 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11174 /* We may be reversing unordered compare to normal compare,
11175 that is not valid in general (we may convert non-trapping
11176 condition to trapping one), however on i386 we currently
11177 emit all comparisons unordered. */
11178 code = reverse_condition_maybe_unordered (code);
11179 else
11181 code = reverse_condition (code);
11182 if (compare_code != UNKNOWN)
11183 compare_code = reverse_condition (compare_code);
11187 if (compare_code != UNKNOWN)
11189 /* notl op1 (if needed)
11190 sarl $31, op1
11191 andl (cf-ct), op1
11192 addl ct, op1
11194 For x < 0 (resp. x <= -1) there will be no notl,
11195 so if possible swap the constants to get rid of the
11196 complement.
11197 True/false will be -1/0 while code below (store flag
11198 followed by decrement) is 0/-1, so the constants need
11199 to be exchanged once more. */
11201 if (compare_code == GE || !cf)
11203 code = reverse_condition (code);
11204 compare_code = LT;
11206 else
11208 HOST_WIDE_INT tmp = cf;
11209 cf = ct;
11210 ct = tmp;
11213 out = emit_store_flag (out, code, ix86_compare_op0,
11214 ix86_compare_op1, VOIDmode, 0, -1);
11216 else
11218 out = emit_store_flag (out, code, ix86_compare_op0,
11219 ix86_compare_op1, VOIDmode, 0, 1);
11221 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11222 copy_rtx (out), 1, OPTAB_DIRECT);
11225 out = expand_simple_binop (mode, AND, copy_rtx (out),
11226 gen_int_mode (cf - ct, mode),
11227 copy_rtx (out), 1, OPTAB_DIRECT);
11228 if (ct)
11229 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11230 copy_rtx (out), 1, OPTAB_DIRECT);
11231 if (!rtx_equal_p (out, operands[0]))
11232 emit_move_insn (operands[0], copy_rtx (out));
11234 return 1; /* DONE */
11238 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11240 /* Try a few things more with specific constants and a variable. */
11242 optab op;
11243 rtx var, orig_out, out, tmp;
11245 if (BRANCH_COST <= 2)
11246 return 0; /* FAIL */
11248 /* If one of the two operands is an interesting constant, load a
11249 constant with the above and mask it in with a logical operation. */
11251 if (GET_CODE (operands[2]) == CONST_INT)
11253 var = operands[3];
11254 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11255 operands[3] = constm1_rtx, op = and_optab;
11256 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11257 operands[3] = const0_rtx, op = ior_optab;
11258 else
11259 return 0; /* FAIL */
11261 else if (GET_CODE (operands[3]) == CONST_INT)
11263 var = operands[2];
11264 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11265 operands[2] = constm1_rtx, op = and_optab;
11266 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11267 operands[2] = const0_rtx, op = ior_optab;
11268 else
11269 return 0; /* FAIL */
11271 else
11272 return 0; /* FAIL */
11274 orig_out = operands[0];
11275 tmp = gen_reg_rtx (mode);
11276 operands[0] = tmp;
11278 /* Recurse to get the constant loaded. */
11279 if (ix86_expand_int_movcc (operands) == 0)
11280 return 0; /* FAIL */
11282 /* Mask in the interesting variable. */
11283 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11284 OPTAB_WIDEN);
11285 if (!rtx_equal_p (out, orig_out))
11286 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11288 return 1; /* DONE */
11292 * For comparison with above,
11294 * movl cf,dest
11295 * movl ct,tmp
11296 * cmpl op1,op2
11297 * cmovcc tmp,dest
11299 * Size 15.
11302 if (! nonimmediate_operand (operands[2], mode))
11303 operands[2] = force_reg (mode, operands[2]);
11304 if (! nonimmediate_operand (operands[3], mode))
11305 operands[3] = force_reg (mode, operands[3]);
11307 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11309 rtx tmp = gen_reg_rtx (mode);
11310 emit_move_insn (tmp, operands[3]);
11311 operands[3] = tmp;
11313 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11315 rtx tmp = gen_reg_rtx (mode);
11316 emit_move_insn (tmp, operands[2]);
11317 operands[2] = tmp;
11320 if (! register_operand (operands[2], VOIDmode)
11321 && (mode == QImode
11322 || ! register_operand (operands[3], VOIDmode)))
11323 operands[2] = force_reg (mode, operands[2]);
11325 if (mode == QImode
11326 && ! register_operand (operands[3], VOIDmode))
11327 operands[3] = force_reg (mode, operands[3]);
11329 emit_insn (compare_seq);
11330 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11331 gen_rtx_IF_THEN_ELSE (mode,
11332 compare_op, operands[2],
11333 operands[3])));
11334 if (bypass_test)
11335 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11336 gen_rtx_IF_THEN_ELSE (mode,
11337 bypass_test,
11338 copy_rtx (operands[3]),
11339 copy_rtx (operands[0]))));
11340 if (second_test)
11341 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11342 gen_rtx_IF_THEN_ELSE (mode,
11343 second_test,
11344 copy_rtx (operands[2]),
11345 copy_rtx (operands[0]))));
11347 return 1; /* DONE */
11350 /* Swap, force into registers, or otherwise massage the two operands
11351 to an sse comparison with a mask result. Thus we differ a bit from
11352 ix86_prepare_fp_compare_args which expects to produce a flags result.
11354 The DEST operand exists to help determine whether to commute commutative
11355 operators. The POP0/POP1 operands are updated in place. The new
11356 comparison code is returned, or UNKNOWN if not implementable. */
11358 static enum rtx_code
11359 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11360 rtx *pop0, rtx *pop1)
11362 rtx tmp;
11364 switch (code)
11366 case LTGT:
11367 case UNEQ:
11368 /* We have no LTGT as an operator. We could implement it with
11369 NE & ORDERED, but this requires an extra temporary. It's
11370 not clear that it's worth it. */
11371 return UNKNOWN;
11373 case LT:
11374 case LE:
11375 case UNGT:
11376 case UNGE:
11377 /* These are supported directly. */
11378 break;
11380 case EQ:
11381 case NE:
11382 case UNORDERED:
11383 case ORDERED:
11384 /* For commutative operators, try to canonicalize the destination
11385 operand to be first in the comparison - this helps reload to
11386 avoid extra moves. */
11387 if (!dest || !rtx_equal_p (dest, *pop1))
11388 break;
11389 /* FALLTHRU */
11391 case GE:
11392 case GT:
11393 case UNLE:
11394 case UNLT:
11395 /* These are not supported directly. Swap the comparison operands
11396 to transform into something that is supported. */
11397 tmp = *pop0;
11398 *pop0 = *pop1;
11399 *pop1 = tmp;
11400 code = swap_condition (code);
11401 break;
11403 default:
11404 gcc_unreachable ();
11407 return code;
11410 /* Detect conditional moves that exactly match min/max operational
11411 semantics. Note that this is IEEE safe, as long as we don't
11412 interchange the operands.
11414 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11415 and TRUE if the operation is successful and instructions are emitted. */
11417 static bool
11418 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11419 rtx cmp_op1, rtx if_true, rtx if_false)
11421 enum machine_mode mode;
11422 bool is_min;
11423 rtx tmp;
11425 if (code == LT)
11427 else if (code == UNGE)
11429 tmp = if_true;
11430 if_true = if_false;
11431 if_false = tmp;
11433 else
11434 return false;
11436 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11437 is_min = true;
11438 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11439 is_min = false;
11440 else
11441 return false;
11443 mode = GET_MODE (dest);
11445 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11446 but MODE may be a vector mode and thus not appropriate. */
11447 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11449 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11450 rtvec v;
11452 if_true = force_reg (mode, if_true);
11453 v = gen_rtvec (2, if_true, if_false);
11454 tmp = gen_rtx_UNSPEC (mode, v, u);
11456 else
11458 code = is_min ? SMIN : SMAX;
11459 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11462 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11463 return true;
11466 /* Expand an sse vector comparison. Return the register with the result. */
11468 static rtx
11469 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11470 rtx op_true, rtx op_false)
11472 enum machine_mode mode = GET_MODE (dest);
11473 rtx x;
11475 cmp_op0 = force_reg (mode, cmp_op0);
11476 if (!nonimmediate_operand (cmp_op1, mode))
11477 cmp_op1 = force_reg (mode, cmp_op1);
11479 if (optimize
11480 || reg_overlap_mentioned_p (dest, op_true)
11481 || reg_overlap_mentioned_p (dest, op_false))
11482 dest = gen_reg_rtx (mode);
11484 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11485 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11487 return dest;
11490 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11491 operations. This is used for both scalar and vector conditional moves. */
11493 static void
11494 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11496 enum machine_mode mode = GET_MODE (dest);
11497 rtx t2, t3, x;
11499 if (op_false == CONST0_RTX (mode))
11501 op_true = force_reg (mode, op_true);
11502 x = gen_rtx_AND (mode, cmp, op_true);
11503 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11505 else if (op_true == CONST0_RTX (mode))
11507 op_false = force_reg (mode, op_false);
11508 x = gen_rtx_NOT (mode, cmp);
11509 x = gen_rtx_AND (mode, x, op_false);
11510 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11512 else
11514 op_true = force_reg (mode, op_true);
11515 op_false = force_reg (mode, op_false);
11517 t2 = gen_reg_rtx (mode);
11518 if (optimize)
11519 t3 = gen_reg_rtx (mode);
11520 else
11521 t3 = dest;
11523 x = gen_rtx_AND (mode, op_true, cmp);
11524 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11526 x = gen_rtx_NOT (mode, cmp);
11527 x = gen_rtx_AND (mode, x, op_false);
11528 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11530 x = gen_rtx_IOR (mode, t3, t2);
11531 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11535 /* Expand a floating-point conditional move. Return true if successful. */
11538 ix86_expand_fp_movcc (rtx operands[])
11540 enum machine_mode mode = GET_MODE (operands[0]);
11541 enum rtx_code code = GET_CODE (operands[1]);
11542 rtx tmp, compare_op, second_test, bypass_test;
11544 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11546 enum machine_mode cmode;
11548 /* Since we've no cmove for sse registers, don't force bad register
11549 allocation just to gain access to it. Deny movcc when the
11550 comparison mode doesn't match the move mode. */
11551 cmode = GET_MODE (ix86_compare_op0);
11552 if (cmode == VOIDmode)
11553 cmode = GET_MODE (ix86_compare_op1);
11554 if (cmode != mode)
11555 return 0;
11557 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11558 &ix86_compare_op0,
11559 &ix86_compare_op1);
11560 if (code == UNKNOWN)
11561 return 0;
11563 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11564 ix86_compare_op1, operands[2],
11565 operands[3]))
11566 return 1;
11568 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11569 ix86_compare_op1, operands[2], operands[3]);
11570 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11571 return 1;
11574 /* The floating point conditional move instructions don't directly
11575 support conditions resulting from a signed integer comparison. */
11577 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11579 /* The floating point conditional move instructions don't directly
11580 support signed integer comparisons. */
11582 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11584 gcc_assert (!second_test && !bypass_test);
11585 tmp = gen_reg_rtx (QImode);
11586 ix86_expand_setcc (code, tmp);
11587 code = NE;
11588 ix86_compare_op0 = tmp;
11589 ix86_compare_op1 = const0_rtx;
11590 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11592 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11594 tmp = gen_reg_rtx (mode);
11595 emit_move_insn (tmp, operands[3]);
11596 operands[3] = tmp;
11598 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11600 tmp = gen_reg_rtx (mode);
11601 emit_move_insn (tmp, operands[2]);
11602 operands[2] = tmp;
11605 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11606 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11607 operands[2], operands[3])));
11608 if (bypass_test)
11609 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11610 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11611 operands[3], operands[0])));
11612 if (second_test)
11613 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11614 gen_rtx_IF_THEN_ELSE (mode, second_test,
11615 operands[2], operands[0])));
11617 return 1;
11620 /* Expand a floating-point vector conditional move; a vcond operation
11621 rather than a movcc operation. */
11623 bool
11624 ix86_expand_fp_vcond (rtx operands[])
11626 enum rtx_code code = GET_CODE (operands[3]);
11627 rtx cmp;
11629 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11630 &operands[4], &operands[5]);
11631 if (code == UNKNOWN)
11632 return false;
11634 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11635 operands[5], operands[1], operands[2]))
11636 return true;
11638 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11639 operands[1], operands[2]);
11640 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11641 return true;
11644 /* Expand a signed integral vector conditional move. */
11646 bool
11647 ix86_expand_int_vcond (rtx operands[])
11649 enum machine_mode mode = GET_MODE (operands[0]);
11650 enum rtx_code code = GET_CODE (operands[3]);
11651 bool negate = false;
11652 rtx x, cop0, cop1;
11654 cop0 = operands[4];
11655 cop1 = operands[5];
11657 /* Canonicalize the comparison to EQ, GT, GTU. */
11658 switch (code)
11660 case EQ:
11661 case GT:
11662 case GTU:
11663 break;
11665 case NE:
11666 case LE:
11667 case LEU:
11668 code = reverse_condition (code);
11669 negate = true;
11670 break;
11672 case GE:
11673 case GEU:
11674 code = reverse_condition (code);
11675 negate = true;
11676 /* FALLTHRU */
11678 case LT:
11679 case LTU:
11680 code = swap_condition (code);
11681 x = cop0, cop0 = cop1, cop1 = x;
11682 break;
11684 default:
11685 gcc_unreachable ();
11688 /* Unsigned parallel compare is not supported by the hardware. Play some
11689 tricks to turn this into a signed comparison against 0. */
11690 if (code == GTU)
11692 cop0 = force_reg (mode, cop0);
11694 switch (mode)
11696 case V4SImode:
11698 rtx t1, t2, mask;
11700 /* Perform a parallel modulo subtraction. */
11701 t1 = gen_reg_rtx (mode);
11702 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11704 /* Extract the original sign bit of op0. */
11705 mask = GEN_INT (-0x80000000);
11706 mask = gen_rtx_CONST_VECTOR (mode,
11707 gen_rtvec (4, mask, mask, mask, mask));
11708 mask = force_reg (mode, mask);
11709 t2 = gen_reg_rtx (mode);
11710 emit_insn (gen_andv4si3 (t2, cop0, mask));
11712 /* XOR it back into the result of the subtraction. This results
11713 in the sign bit set iff we saw unsigned underflow. */
11714 x = gen_reg_rtx (mode);
11715 emit_insn (gen_xorv4si3 (x, t1, t2));
11717 code = GT;
11719 break;
11721 case V16QImode:
11722 case V8HImode:
11723 /* Perform a parallel unsigned saturating subtraction. */
11724 x = gen_reg_rtx (mode);
11725 emit_insn (gen_rtx_SET (VOIDmode, x,
11726 gen_rtx_US_MINUS (mode, cop0, cop1)));
11728 code = EQ;
11729 negate = !negate;
11730 break;
11732 default:
11733 gcc_unreachable ();
11736 cop0 = x;
11737 cop1 = CONST0_RTX (mode);
11740 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11741 operands[1+negate], operands[2-negate]);
11743 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11744 operands[2-negate]);
11745 return true;
11748 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
11749 true if we should do zero extension, else sign extension. HIGH_P is
11750 true if we want the N/2 high elements, else the low elements. */
11752 void
11753 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
11755 enum machine_mode imode = GET_MODE (operands[1]);
11756 rtx (*unpack)(rtx, rtx, rtx);
11757 rtx se, dest;
11759 switch (imode)
11761 case V16QImode:
11762 if (high_p)
11763 unpack = gen_vec_interleave_highv16qi;
11764 else
11765 unpack = gen_vec_interleave_lowv16qi;
11766 break;
11767 case V8HImode:
11768 if (high_p)
11769 unpack = gen_vec_interleave_highv8hi;
11770 else
11771 unpack = gen_vec_interleave_lowv8hi;
11772 break;
11773 case V4SImode:
11774 if (high_p)
11775 unpack = gen_vec_interleave_highv4si;
11776 else
11777 unpack = gen_vec_interleave_lowv4si;
11778 break;
11779 default:
11780 gcc_unreachable ();
11783 dest = gen_lowpart (imode, operands[0]);
11785 if (unsigned_p)
11786 se = force_reg (imode, CONST0_RTX (imode));
11787 else
11788 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
11789 operands[1], pc_rtx, pc_rtx);
11791 emit_insn (unpack (dest, operands[1], se));
11794 /* Expand conditional increment or decrement using adb/sbb instructions.
11795 The default case using setcc followed by the conditional move can be
11796 done by generic code. */
11798 ix86_expand_int_addcc (rtx operands[])
11800 enum rtx_code code = GET_CODE (operands[1]);
11801 rtx compare_op;
11802 rtx val = const0_rtx;
11803 bool fpcmp = false;
11804 enum machine_mode mode = GET_MODE (operands[0]);
11806 if (operands[3] != const1_rtx
11807 && operands[3] != constm1_rtx)
11808 return 0;
11809 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11810 ix86_compare_op1, &compare_op))
11811 return 0;
11812 code = GET_CODE (compare_op);
11814 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11815 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11817 fpcmp = true;
11818 code = ix86_fp_compare_code_to_integer (code);
11821 if (code != LTU)
11823 val = constm1_rtx;
11824 if (fpcmp)
11825 PUT_CODE (compare_op,
11826 reverse_condition_maybe_unordered
11827 (GET_CODE (compare_op)));
11828 else
11829 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11831 PUT_MODE (compare_op, mode);
11833 /* Construct either adc or sbb insn. */
11834 if ((code == LTU) == (operands[3] == constm1_rtx))
11836 switch (GET_MODE (operands[0]))
11838 case QImode:
11839 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11840 break;
11841 case HImode:
11842 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11843 break;
11844 case SImode:
11845 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11846 break;
11847 case DImode:
11848 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11849 break;
11850 default:
11851 gcc_unreachable ();
11854 else
11856 switch (GET_MODE (operands[0]))
11858 case QImode:
11859 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11860 break;
11861 case HImode:
11862 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11863 break;
11864 case SImode:
11865 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11866 break;
11867 case DImode:
11868 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11869 break;
11870 default:
11871 gcc_unreachable ();
11874 return 1; /* DONE */
11878 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11879 works for floating pointer parameters and nonoffsetable memories.
11880 For pushes, it returns just stack offsets; the values will be saved
11881 in the right order. Maximally three parts are generated. */
11883 static int
11884 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11886 int size;
11888 if (!TARGET_64BIT)
11889 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11890 else
11891 size = (GET_MODE_SIZE (mode) + 4) / 8;
11893 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11894 gcc_assert (size >= 2 && size <= 3);
11896 /* Optimize constant pool reference to immediates. This is used by fp
11897 moves, that force all constants to memory to allow combining. */
11898 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11900 rtx tmp = maybe_get_pool_constant (operand);
11901 if (tmp)
11902 operand = tmp;
11905 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11907 /* The only non-offsetable memories we handle are pushes. */
11908 int ok = push_operand (operand, VOIDmode);
11910 gcc_assert (ok);
11912 operand = copy_rtx (operand);
11913 PUT_MODE (operand, Pmode);
11914 parts[0] = parts[1] = parts[2] = operand;
11915 return size;
11918 if (GET_CODE (operand) == CONST_VECTOR)
11920 enum machine_mode imode = int_mode_for_mode (mode);
11921 /* Caution: if we looked through a constant pool memory above,
11922 the operand may actually have a different mode now. That's
11923 ok, since we want to pun this all the way back to an integer. */
11924 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11925 gcc_assert (operand != NULL);
11926 mode = imode;
11929 if (!TARGET_64BIT)
11931 if (mode == DImode)
11932 split_di (&operand, 1, &parts[0], &parts[1]);
11933 else
11935 if (REG_P (operand))
11937 gcc_assert (reload_completed);
11938 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11939 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11940 if (size == 3)
11941 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11943 else if (offsettable_memref_p (operand))
11945 operand = adjust_address (operand, SImode, 0);
11946 parts[0] = operand;
11947 parts[1] = adjust_address (operand, SImode, 4);
11948 if (size == 3)
11949 parts[2] = adjust_address (operand, SImode, 8);
11951 else if (GET_CODE (operand) == CONST_DOUBLE)
11953 REAL_VALUE_TYPE r;
11954 long l[4];
11956 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11957 switch (mode)
11959 case XFmode:
11960 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11961 parts[2] = gen_int_mode (l[2], SImode);
11962 break;
11963 case DFmode:
11964 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11965 break;
11966 default:
11967 gcc_unreachable ();
11969 parts[1] = gen_int_mode (l[1], SImode);
11970 parts[0] = gen_int_mode (l[0], SImode);
11972 else
11973 gcc_unreachable ();
11976 else
11978 if (mode == TImode)
11979 split_ti (&operand, 1, &parts[0], &parts[1]);
11980 if (mode == XFmode || mode == TFmode)
11982 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11983 if (REG_P (operand))
11985 gcc_assert (reload_completed);
11986 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11987 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11989 else if (offsettable_memref_p (operand))
11991 operand = adjust_address (operand, DImode, 0);
11992 parts[0] = operand;
11993 parts[1] = adjust_address (operand, upper_mode, 8);
11995 else if (GET_CODE (operand) == CONST_DOUBLE)
11997 REAL_VALUE_TYPE r;
11998 long l[4];
12000 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12001 real_to_target (l, &r, mode);
12003 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12004 if (HOST_BITS_PER_WIDE_INT >= 64)
12005 parts[0]
12006 = gen_int_mode
12007 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12008 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12009 DImode);
12010 else
12011 parts[0] = immed_double_const (l[0], l[1], DImode);
12013 if (upper_mode == SImode)
12014 parts[1] = gen_int_mode (l[2], SImode);
12015 else if (HOST_BITS_PER_WIDE_INT >= 64)
12016 parts[1]
12017 = gen_int_mode
12018 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12019 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12020 DImode);
12021 else
12022 parts[1] = immed_double_const (l[2], l[3], DImode);
12024 else
12025 gcc_unreachable ();
12029 return size;
12032 /* Emit insns to perform a move or push of DI, DF, and XF values.
12033 Return false when normal moves are needed; true when all required
12034 insns have been emitted. Operands 2-4 contain the input values
12035 int the correct order; operands 5-7 contain the output values. */
12037 void
12038 ix86_split_long_move (rtx operands[])
12040 rtx part[2][3];
12041 int nparts;
12042 int push = 0;
12043 int collisions = 0;
12044 enum machine_mode mode = GET_MODE (operands[0]);
12046 /* The DFmode expanders may ask us to move double.
12047 For 64bit target this is single move. By hiding the fact
12048 here we simplify i386.md splitters. */
12049 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12051 /* Optimize constant pool reference to immediates. This is used by
12052 fp moves, that force all constants to memory to allow combining. */
12054 if (GET_CODE (operands[1]) == MEM
12055 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12056 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12057 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12058 if (push_operand (operands[0], VOIDmode))
12060 operands[0] = copy_rtx (operands[0]);
12061 PUT_MODE (operands[0], Pmode);
12063 else
12064 operands[0] = gen_lowpart (DImode, operands[0]);
12065 operands[1] = gen_lowpart (DImode, operands[1]);
12066 emit_move_insn (operands[0], operands[1]);
12067 return;
12070 /* The only non-offsettable memory we handle is push. */
12071 if (push_operand (operands[0], VOIDmode))
12072 push = 1;
12073 else
12074 gcc_assert (GET_CODE (operands[0]) != MEM
12075 || offsettable_memref_p (operands[0]));
12077 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12078 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12080 /* When emitting push, take care for source operands on the stack. */
12081 if (push && GET_CODE (operands[1]) == MEM
12082 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12084 if (nparts == 3)
12085 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12086 XEXP (part[1][2], 0));
12087 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12088 XEXP (part[1][1], 0));
12091 /* We need to do copy in the right order in case an address register
12092 of the source overlaps the destination. */
12093 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12095 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12096 collisions++;
12097 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12098 collisions++;
12099 if (nparts == 3
12100 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12101 collisions++;
12103 /* Collision in the middle part can be handled by reordering. */
12104 if (collisions == 1 && nparts == 3
12105 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12107 rtx tmp;
12108 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12109 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12112 /* If there are more collisions, we can't handle it by reordering.
12113 Do an lea to the last part and use only one colliding move. */
12114 else if (collisions > 1)
12116 rtx base;
12118 collisions = 1;
12120 base = part[0][nparts - 1];
12122 /* Handle the case when the last part isn't valid for lea.
12123 Happens in 64-bit mode storing the 12-byte XFmode. */
12124 if (GET_MODE (base) != Pmode)
12125 base = gen_rtx_REG (Pmode, REGNO (base));
12127 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12128 part[1][0] = replace_equiv_address (part[1][0], base);
12129 part[1][1] = replace_equiv_address (part[1][1],
12130 plus_constant (base, UNITS_PER_WORD));
12131 if (nparts == 3)
12132 part[1][2] = replace_equiv_address (part[1][2],
12133 plus_constant (base, 8));
12137 if (push)
12139 if (!TARGET_64BIT)
12141 if (nparts == 3)
12143 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12144 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12145 emit_move_insn (part[0][2], part[1][2]);
12148 else
12150 /* In 64bit mode we don't have 32bit push available. In case this is
12151 register, it is OK - we will just use larger counterpart. We also
12152 retype memory - these comes from attempt to avoid REX prefix on
12153 moving of second half of TFmode value. */
12154 if (GET_MODE (part[1][1]) == SImode)
12156 switch (GET_CODE (part[1][1]))
12158 case MEM:
12159 part[1][1] = adjust_address (part[1][1], DImode, 0);
12160 break;
12162 case REG:
12163 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12164 break;
12166 default:
12167 gcc_unreachable ();
12170 if (GET_MODE (part[1][0]) == SImode)
12171 part[1][0] = part[1][1];
12174 emit_move_insn (part[0][1], part[1][1]);
12175 emit_move_insn (part[0][0], part[1][0]);
12176 return;
12179 /* Choose correct order to not overwrite the source before it is copied. */
12180 if ((REG_P (part[0][0])
12181 && REG_P (part[1][1])
12182 && (REGNO (part[0][0]) == REGNO (part[1][1])
12183 || (nparts == 3
12184 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12185 || (collisions > 0
12186 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12188 if (nparts == 3)
12190 operands[2] = part[0][2];
12191 operands[3] = part[0][1];
12192 operands[4] = part[0][0];
12193 operands[5] = part[1][2];
12194 operands[6] = part[1][1];
12195 operands[7] = part[1][0];
12197 else
12199 operands[2] = part[0][1];
12200 operands[3] = part[0][0];
12201 operands[5] = part[1][1];
12202 operands[6] = part[1][0];
12205 else
12207 if (nparts == 3)
12209 operands[2] = part[0][0];
12210 operands[3] = part[0][1];
12211 operands[4] = part[0][2];
12212 operands[5] = part[1][0];
12213 operands[6] = part[1][1];
12214 operands[7] = part[1][2];
12216 else
12218 operands[2] = part[0][0];
12219 operands[3] = part[0][1];
12220 operands[5] = part[1][0];
12221 operands[6] = part[1][1];
12225 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12226 if (optimize_size)
12228 if (GET_CODE (operands[5]) == CONST_INT
12229 && operands[5] != const0_rtx
12230 && REG_P (operands[2]))
12232 if (GET_CODE (operands[6]) == CONST_INT
12233 && INTVAL (operands[6]) == INTVAL (operands[5]))
12234 operands[6] = operands[2];
12236 if (nparts == 3
12237 && GET_CODE (operands[7]) == CONST_INT
12238 && INTVAL (operands[7]) == INTVAL (operands[5]))
12239 operands[7] = operands[2];
12242 if (nparts == 3
12243 && GET_CODE (operands[6]) == CONST_INT
12244 && operands[6] != const0_rtx
12245 && REG_P (operands[3])
12246 && GET_CODE (operands[7]) == CONST_INT
12247 && INTVAL (operands[7]) == INTVAL (operands[6]))
12248 operands[7] = operands[3];
12251 emit_move_insn (operands[2], operands[5]);
12252 emit_move_insn (operands[3], operands[6]);
12253 if (nparts == 3)
12254 emit_move_insn (operands[4], operands[7]);
12256 return;
12259 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12260 left shift by a constant, either using a single shift or
12261 a sequence of add instructions. */
12263 static void
12264 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12266 if (count == 1)
12268 emit_insn ((mode == DImode
12269 ? gen_addsi3
12270 : gen_adddi3) (operand, operand, operand));
12272 else if (!optimize_size
12273 && count * ix86_cost->add <= ix86_cost->shift_const)
12275 int i;
12276 for (i=0; i<count; i++)
12278 emit_insn ((mode == DImode
12279 ? gen_addsi3
12280 : gen_adddi3) (operand, operand, operand));
12283 else
12284 emit_insn ((mode == DImode
12285 ? gen_ashlsi3
12286 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12289 void
12290 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12292 rtx low[2], high[2];
12293 int count;
12294 const int single_width = mode == DImode ? 32 : 64;
12296 if (GET_CODE (operands[2]) == CONST_INT)
12298 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12299 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12301 if (count >= single_width)
12303 emit_move_insn (high[0], low[1]);
12304 emit_move_insn (low[0], const0_rtx);
12306 if (count > single_width)
12307 ix86_expand_ashl_const (high[0], count - single_width, mode);
12309 else
12311 if (!rtx_equal_p (operands[0], operands[1]))
12312 emit_move_insn (operands[0], operands[1]);
12313 emit_insn ((mode == DImode
12314 ? gen_x86_shld_1
12315 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12316 ix86_expand_ashl_const (low[0], count, mode);
12318 return;
12321 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12323 if (operands[1] == const1_rtx)
12325 /* Assuming we've chosen a QImode capable registers, then 1 << N
12326 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12327 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12329 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12331 ix86_expand_clear (low[0]);
12332 ix86_expand_clear (high[0]);
12333 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12335 d = gen_lowpart (QImode, low[0]);
12336 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12337 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12338 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12340 d = gen_lowpart (QImode, high[0]);
12341 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12342 s = gen_rtx_NE (QImode, flags, const0_rtx);
12343 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12346 /* Otherwise, we can get the same results by manually performing
12347 a bit extract operation on bit 5/6, and then performing the two
12348 shifts. The two methods of getting 0/1 into low/high are exactly
12349 the same size. Avoiding the shift in the bit extract case helps
12350 pentium4 a bit; no one else seems to care much either way. */
12351 else
12353 rtx x;
12355 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12356 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12357 else
12358 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12359 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12361 emit_insn ((mode == DImode
12362 ? gen_lshrsi3
12363 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12364 emit_insn ((mode == DImode
12365 ? gen_andsi3
12366 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12367 emit_move_insn (low[0], high[0]);
12368 emit_insn ((mode == DImode
12369 ? gen_xorsi3
12370 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12373 emit_insn ((mode == DImode
12374 ? gen_ashlsi3
12375 : gen_ashldi3) (low[0], low[0], operands[2]));
12376 emit_insn ((mode == DImode
12377 ? gen_ashlsi3
12378 : gen_ashldi3) (high[0], high[0], operands[2]));
12379 return;
12382 if (operands[1] == constm1_rtx)
12384 /* For -1 << N, we can avoid the shld instruction, because we
12385 know that we're shifting 0...31/63 ones into a -1. */
12386 emit_move_insn (low[0], constm1_rtx);
12387 if (optimize_size)
12388 emit_move_insn (high[0], low[0]);
12389 else
12390 emit_move_insn (high[0], constm1_rtx);
12392 else
12394 if (!rtx_equal_p (operands[0], operands[1]))
12395 emit_move_insn (operands[0], operands[1]);
12397 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12398 emit_insn ((mode == DImode
12399 ? gen_x86_shld_1
12400 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12403 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12405 if (TARGET_CMOVE && scratch)
12407 ix86_expand_clear (scratch);
12408 emit_insn ((mode == DImode
12409 ? gen_x86_shift_adj_1
12410 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12412 else
12413 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12416 void
12417 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12419 rtx low[2], high[2];
12420 int count;
12421 const int single_width = mode == DImode ? 32 : 64;
12423 if (GET_CODE (operands[2]) == CONST_INT)
12425 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12426 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12428 if (count == single_width * 2 - 1)
12430 emit_move_insn (high[0], high[1]);
12431 emit_insn ((mode == DImode
12432 ? gen_ashrsi3
12433 : gen_ashrdi3) (high[0], high[0],
12434 GEN_INT (single_width - 1)));
12435 emit_move_insn (low[0], high[0]);
12438 else if (count >= single_width)
12440 emit_move_insn (low[0], high[1]);
12441 emit_move_insn (high[0], low[0]);
12442 emit_insn ((mode == DImode
12443 ? gen_ashrsi3
12444 : gen_ashrdi3) (high[0], high[0],
12445 GEN_INT (single_width - 1)));
12446 if (count > single_width)
12447 emit_insn ((mode == DImode
12448 ? gen_ashrsi3
12449 : gen_ashrdi3) (low[0], low[0],
12450 GEN_INT (count - single_width)));
12452 else
12454 if (!rtx_equal_p (operands[0], operands[1]))
12455 emit_move_insn (operands[0], operands[1]);
12456 emit_insn ((mode == DImode
12457 ? gen_x86_shrd_1
12458 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12459 emit_insn ((mode == DImode
12460 ? gen_ashrsi3
12461 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12464 else
12466 if (!rtx_equal_p (operands[0], operands[1]))
12467 emit_move_insn (operands[0], operands[1]);
12469 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12471 emit_insn ((mode == DImode
12472 ? gen_x86_shrd_1
12473 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12474 emit_insn ((mode == DImode
12475 ? gen_ashrsi3
12476 : gen_ashrdi3) (high[0], high[0], operands[2]));
12478 if (TARGET_CMOVE && scratch)
12480 emit_move_insn (scratch, high[0]);
12481 emit_insn ((mode == DImode
12482 ? gen_ashrsi3
12483 : gen_ashrdi3) (scratch, scratch,
12484 GEN_INT (single_width - 1)));
12485 emit_insn ((mode == DImode
12486 ? gen_x86_shift_adj_1
12487 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12488 scratch));
12490 else
12491 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12495 void
12496 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12498 rtx low[2], high[2];
12499 int count;
12500 const int single_width = mode == DImode ? 32 : 64;
12502 if (GET_CODE (operands[2]) == CONST_INT)
12504 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12505 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12507 if (count >= single_width)
12509 emit_move_insn (low[0], high[1]);
12510 ix86_expand_clear (high[0]);
12512 if (count > single_width)
12513 emit_insn ((mode == DImode
12514 ? gen_lshrsi3
12515 : gen_lshrdi3) (low[0], low[0],
12516 GEN_INT (count - single_width)));
12518 else
12520 if (!rtx_equal_p (operands[0], operands[1]))
12521 emit_move_insn (operands[0], operands[1]);
12522 emit_insn ((mode == DImode
12523 ? gen_x86_shrd_1
12524 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12525 emit_insn ((mode == DImode
12526 ? gen_lshrsi3
12527 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12530 else
12532 if (!rtx_equal_p (operands[0], operands[1]))
12533 emit_move_insn (operands[0], operands[1]);
12535 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12537 emit_insn ((mode == DImode
12538 ? gen_x86_shrd_1
12539 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12540 emit_insn ((mode == DImode
12541 ? gen_lshrsi3
12542 : gen_lshrdi3) (high[0], high[0], operands[2]));
12544 /* Heh. By reversing the arguments, we can reuse this pattern. */
12545 if (TARGET_CMOVE && scratch)
12547 ix86_expand_clear (scratch);
12548 emit_insn ((mode == DImode
12549 ? gen_x86_shift_adj_1
12550 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12551 scratch));
12553 else
12554 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12558 /* Helper function for the string operations below. Dest VARIABLE whether
12559 it is aligned to VALUE bytes. If true, jump to the label. */
12560 static rtx
12561 ix86_expand_aligntest (rtx variable, int value)
12563 rtx label = gen_label_rtx ();
12564 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12565 if (GET_MODE (variable) == DImode)
12566 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12567 else
12568 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12569 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12570 1, label);
12571 return label;
12574 /* Adjust COUNTER by the VALUE. */
12575 static void
12576 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12578 if (GET_MODE (countreg) == DImode)
12579 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12580 else
12581 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12584 /* Zero extend possibly SImode EXP to Pmode register. */
12586 ix86_zero_extend_to_Pmode (rtx exp)
12588 rtx r;
12589 if (GET_MODE (exp) == VOIDmode)
12590 return force_reg (Pmode, exp);
12591 if (GET_MODE (exp) == Pmode)
12592 return copy_to_mode_reg (Pmode, exp);
12593 r = gen_reg_rtx (Pmode);
12594 emit_insn (gen_zero_extendsidi2 (r, exp));
12595 return r;
12598 /* Expand string move (memcpy) operation. Use i386 string operations when
12599 profitable. expand_clrmem contains similar code. */
12601 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12603 rtx srcreg, destreg, countreg, srcexp, destexp;
12604 enum machine_mode counter_mode;
12605 HOST_WIDE_INT align = 0;
12606 unsigned HOST_WIDE_INT count = 0;
12608 if (GET_CODE (align_exp) == CONST_INT)
12609 align = INTVAL (align_exp);
12611 /* Can't use any of this if the user has appropriated esi or edi. */
12612 if (global_regs[4] || global_regs[5])
12613 return 0;
12615 /* This simple hack avoids all inlining code and simplifies code below. */
12616 if (!TARGET_ALIGN_STRINGOPS)
12617 align = 64;
12619 if (GET_CODE (count_exp) == CONST_INT)
12621 count = INTVAL (count_exp);
12622 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12623 return 0;
12626 /* Figure out proper mode for counter. For 32bits it is always SImode,
12627 for 64bits use SImode when possible, otherwise DImode.
12628 Set count to number of bytes copied when known at compile time. */
12629 if (!TARGET_64BIT
12630 || GET_MODE (count_exp) == SImode
12631 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12632 counter_mode = SImode;
12633 else
12634 counter_mode = DImode;
12636 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12638 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12639 if (destreg != XEXP (dst, 0))
12640 dst = replace_equiv_address_nv (dst, destreg);
12641 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12642 if (srcreg != XEXP (src, 0))
12643 src = replace_equiv_address_nv (src, srcreg);
12645 /* When optimizing for size emit simple rep ; movsb instruction for
12646 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12647 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12648 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12649 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12650 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12651 known to be zero or not. The rep; movsb sequence causes higher
12652 register pressure though, so take that into account. */
12654 if ((!optimize || optimize_size)
12655 && (count == 0
12656 || ((count & 0x03)
12657 && (!optimize_size
12658 || count > 5 * 4
12659 || (count & 3) + count / 4 > 6))))
12661 emit_insn (gen_cld ());
12662 countreg = ix86_zero_extend_to_Pmode (count_exp);
12663 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12664 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12665 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12666 destexp, srcexp));
12669 /* For constant aligned (or small unaligned) copies use rep movsl
12670 followed by code copying the rest. For PentiumPro ensure 8 byte
12671 alignment to allow rep movsl acceleration. */
12673 else if (count != 0
12674 && (align >= 8
12675 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12676 || optimize_size || count < (unsigned int) 64))
12678 unsigned HOST_WIDE_INT offset = 0;
12679 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12680 rtx srcmem, dstmem;
12682 emit_insn (gen_cld ());
12683 if (count & ~(size - 1))
12685 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12687 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12689 while (offset < (count & ~(size - 1)))
12691 srcmem = adjust_automodify_address_nv (src, movs_mode,
12692 srcreg, offset);
12693 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12694 destreg, offset);
12695 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12696 offset += size;
12699 else
12701 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12702 & (TARGET_64BIT ? -1 : 0x3fffffff));
12703 countreg = copy_to_mode_reg (counter_mode, countreg);
12704 countreg = ix86_zero_extend_to_Pmode (countreg);
12706 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12707 GEN_INT (size == 4 ? 2 : 3));
12708 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12709 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12711 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12712 countreg, destexp, srcexp));
12713 offset = count & ~(size - 1);
12716 if (size == 8 && (count & 0x04))
12718 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12719 offset);
12720 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12721 offset);
12722 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12723 offset += 4;
12725 if (count & 0x02)
12727 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12728 offset);
12729 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12730 offset);
12731 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12732 offset += 2;
12734 if (count & 0x01)
12736 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12737 offset);
12738 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12739 offset);
12740 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12743 /* The generic code based on the glibc implementation:
12744 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12745 allowing accelerated copying there)
12746 - copy the data using rep movsl
12747 - copy the rest. */
12748 else
12750 rtx countreg2;
12751 rtx label = NULL;
12752 rtx srcmem, dstmem;
12753 int desired_alignment = (TARGET_PENTIUMPRO
12754 && (count == 0 || count >= (unsigned int) 260)
12755 ? 8 : UNITS_PER_WORD);
12756 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12757 dst = change_address (dst, BLKmode, destreg);
12758 src = change_address (src, BLKmode, srcreg);
12760 /* In case we don't know anything about the alignment, default to
12761 library version, since it is usually equally fast and result in
12762 shorter code.
12764 Also emit call when we know that the count is large and call overhead
12765 will not be important. */
12766 if (!TARGET_INLINE_ALL_STRINGOPS
12767 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12768 return 0;
12770 if (TARGET_SINGLE_STRINGOP)
12771 emit_insn (gen_cld ());
12773 countreg2 = gen_reg_rtx (Pmode);
12774 countreg = copy_to_mode_reg (counter_mode, count_exp);
12776 /* We don't use loops to align destination and to copy parts smaller
12777 than 4 bytes, because gcc is able to optimize such code better (in
12778 the case the destination or the count really is aligned, gcc is often
12779 able to predict the branches) and also it is friendlier to the
12780 hardware branch prediction.
12782 Using loops is beneficial for generic case, because we can
12783 handle small counts using the loops. Many CPUs (such as Athlon)
12784 have large REP prefix setup costs.
12786 This is quite costly. Maybe we can revisit this decision later or
12787 add some customizability to this code. */
12789 if (count == 0 && align < desired_alignment)
12791 label = gen_label_rtx ();
12792 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12793 LEU, 0, counter_mode, 1, label);
12795 if (align <= 1)
12797 rtx label = ix86_expand_aligntest (destreg, 1);
12798 srcmem = change_address (src, QImode, srcreg);
12799 dstmem = change_address (dst, QImode, destreg);
12800 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12801 ix86_adjust_counter (countreg, 1);
12802 emit_label (label);
12803 LABEL_NUSES (label) = 1;
12805 if (align <= 2)
12807 rtx label = ix86_expand_aligntest (destreg, 2);
12808 srcmem = change_address (src, HImode, srcreg);
12809 dstmem = change_address (dst, HImode, destreg);
12810 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12811 ix86_adjust_counter (countreg, 2);
12812 emit_label (label);
12813 LABEL_NUSES (label) = 1;
12815 if (align <= 4 && desired_alignment > 4)
12817 rtx label = ix86_expand_aligntest (destreg, 4);
12818 srcmem = change_address (src, SImode, srcreg);
12819 dstmem = change_address (dst, SImode, destreg);
12820 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12821 ix86_adjust_counter (countreg, 4);
12822 emit_label (label);
12823 LABEL_NUSES (label) = 1;
12826 if (label && desired_alignment > 4 && !TARGET_64BIT)
12828 emit_label (label);
12829 LABEL_NUSES (label) = 1;
12830 label = NULL_RTX;
12832 if (!TARGET_SINGLE_STRINGOP)
12833 emit_insn (gen_cld ());
12834 if (TARGET_64BIT)
12836 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12837 GEN_INT (3)));
12838 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12840 else
12842 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12843 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12845 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12846 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12847 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12848 countreg2, destexp, srcexp));
12850 if (label)
12852 emit_label (label);
12853 LABEL_NUSES (label) = 1;
12855 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12857 srcmem = change_address (src, SImode, srcreg);
12858 dstmem = change_address (dst, SImode, destreg);
12859 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12861 if ((align <= 4 || count == 0) && TARGET_64BIT)
12863 rtx label = ix86_expand_aligntest (countreg, 4);
12864 srcmem = change_address (src, SImode, srcreg);
12865 dstmem = change_address (dst, SImode, destreg);
12866 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12867 emit_label (label);
12868 LABEL_NUSES (label) = 1;
12870 if (align > 2 && count != 0 && (count & 2))
12872 srcmem = change_address (src, HImode, srcreg);
12873 dstmem = change_address (dst, HImode, destreg);
12874 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12876 if (align <= 2 || count == 0)
12878 rtx label = ix86_expand_aligntest (countreg, 2);
12879 srcmem = change_address (src, HImode, srcreg);
12880 dstmem = change_address (dst, HImode, destreg);
12881 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12882 emit_label (label);
12883 LABEL_NUSES (label) = 1;
12885 if (align > 1 && count != 0 && (count & 1))
12887 srcmem = change_address (src, QImode, srcreg);
12888 dstmem = change_address (dst, QImode, destreg);
12889 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12891 if (align <= 1 || count == 0)
12893 rtx label = ix86_expand_aligntest (countreg, 1);
12894 srcmem = change_address (src, QImode, srcreg);
12895 dstmem = change_address (dst, QImode, destreg);
12896 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12897 emit_label (label);
12898 LABEL_NUSES (label) = 1;
12902 return 1;
12905 /* Expand string clear operation (bzero). Use i386 string operations when
12906 profitable. expand_movmem contains similar code. */
12908 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12910 rtx destreg, zeroreg, countreg, destexp;
12911 enum machine_mode counter_mode;
12912 HOST_WIDE_INT align = 0;
12913 unsigned HOST_WIDE_INT count = 0;
12915 if (GET_CODE (align_exp) == CONST_INT)
12916 align = INTVAL (align_exp);
12918 /* Can't use any of this if the user has appropriated esi. */
12919 if (global_regs[4])
12920 return 0;
12922 /* This simple hack avoids all inlining code and simplifies code below. */
12923 if (!TARGET_ALIGN_STRINGOPS)
12924 align = 32;
12926 if (GET_CODE (count_exp) == CONST_INT)
12928 count = INTVAL (count_exp);
12929 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12930 return 0;
12932 /* Figure out proper mode for counter. For 32bits it is always SImode,
12933 for 64bits use SImode when possible, otherwise DImode.
12934 Set count to number of bytes copied when known at compile time. */
12935 if (!TARGET_64BIT
12936 || GET_MODE (count_exp) == SImode
12937 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12938 counter_mode = SImode;
12939 else
12940 counter_mode = DImode;
12942 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12943 if (destreg != XEXP (dst, 0))
12944 dst = replace_equiv_address_nv (dst, destreg);
12947 /* When optimizing for size emit simple rep ; movsb instruction for
12948 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12949 sequence is 7 bytes long, so if optimizing for size and count is
12950 small enough that some stosl, stosw and stosb instructions without
12951 rep are shorter, fall back into the next if. */
12953 if ((!optimize || optimize_size)
12954 && (count == 0
12955 || ((count & 0x03)
12956 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12958 emit_insn (gen_cld ());
12960 countreg = ix86_zero_extend_to_Pmode (count_exp);
12961 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12962 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12963 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12965 else if (count != 0
12966 && (align >= 8
12967 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12968 || optimize_size || count < (unsigned int) 64))
12970 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12971 unsigned HOST_WIDE_INT offset = 0;
12973 emit_insn (gen_cld ());
12975 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12976 if (count & ~(size - 1))
12978 unsigned HOST_WIDE_INT repcount;
12979 unsigned int max_nonrep;
12981 repcount = count >> (size == 4 ? 2 : 3);
12982 if (!TARGET_64BIT)
12983 repcount &= 0x3fffffff;
12985 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12986 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12987 bytes. In both cases the latter seems to be faster for small
12988 values of N. */
12989 max_nonrep = size == 4 ? 7 : 4;
12990 if (!optimize_size)
12991 switch (ix86_tune)
12993 case PROCESSOR_PENTIUM4:
12994 case PROCESSOR_NOCONA:
12995 max_nonrep = 3;
12996 break;
12997 default:
12998 break;
13001 if (repcount <= max_nonrep)
13002 while (repcount-- > 0)
13004 rtx mem = adjust_automodify_address_nv (dst,
13005 GET_MODE (zeroreg),
13006 destreg, offset);
13007 emit_insn (gen_strset (destreg, mem, zeroreg));
13008 offset += size;
13010 else
13012 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13013 countreg = ix86_zero_extend_to_Pmode (countreg);
13014 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13015 GEN_INT (size == 4 ? 2 : 3));
13016 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13017 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13018 destexp));
13019 offset = count & ~(size - 1);
13022 if (size == 8 && (count & 0x04))
13024 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13025 offset);
13026 emit_insn (gen_strset (destreg, mem,
13027 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13028 offset += 4;
13030 if (count & 0x02)
13032 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13033 offset);
13034 emit_insn (gen_strset (destreg, mem,
13035 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13036 offset += 2;
13038 if (count & 0x01)
13040 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13041 offset);
13042 emit_insn (gen_strset (destreg, mem,
13043 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13046 else
13048 rtx countreg2;
13049 rtx label = NULL;
13050 /* Compute desired alignment of the string operation. */
13051 int desired_alignment = (TARGET_PENTIUMPRO
13052 && (count == 0 || count >= (unsigned int) 260)
13053 ? 8 : UNITS_PER_WORD);
13055 /* In case we don't know anything about the alignment, default to
13056 library version, since it is usually equally fast and result in
13057 shorter code.
13059 Also emit call when we know that the count is large and call overhead
13060 will not be important. */
13061 if (!TARGET_INLINE_ALL_STRINGOPS
13062 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13063 return 0;
13065 if (TARGET_SINGLE_STRINGOP)
13066 emit_insn (gen_cld ());
13068 countreg2 = gen_reg_rtx (Pmode);
13069 countreg = copy_to_mode_reg (counter_mode, count_exp);
13070 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13071 /* Get rid of MEM_OFFSET, it won't be accurate. */
13072 dst = change_address (dst, BLKmode, destreg);
13074 if (count == 0 && align < desired_alignment)
13076 label = gen_label_rtx ();
13077 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13078 LEU, 0, counter_mode, 1, label);
13080 if (align <= 1)
13082 rtx label = ix86_expand_aligntest (destreg, 1);
13083 emit_insn (gen_strset (destreg, dst,
13084 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13085 ix86_adjust_counter (countreg, 1);
13086 emit_label (label);
13087 LABEL_NUSES (label) = 1;
13089 if (align <= 2)
13091 rtx label = ix86_expand_aligntest (destreg, 2);
13092 emit_insn (gen_strset (destreg, dst,
13093 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13094 ix86_adjust_counter (countreg, 2);
13095 emit_label (label);
13096 LABEL_NUSES (label) = 1;
13098 if (align <= 4 && desired_alignment > 4)
13100 rtx label = ix86_expand_aligntest (destreg, 4);
13101 emit_insn (gen_strset (destreg, dst,
13102 (TARGET_64BIT
13103 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13104 : zeroreg)));
13105 ix86_adjust_counter (countreg, 4);
13106 emit_label (label);
13107 LABEL_NUSES (label) = 1;
13110 if (label && desired_alignment > 4 && !TARGET_64BIT)
13112 emit_label (label);
13113 LABEL_NUSES (label) = 1;
13114 label = NULL_RTX;
13117 if (!TARGET_SINGLE_STRINGOP)
13118 emit_insn (gen_cld ());
13119 if (TARGET_64BIT)
13121 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13122 GEN_INT (3)));
13123 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13125 else
13127 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13128 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13130 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13131 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13133 if (label)
13135 emit_label (label);
13136 LABEL_NUSES (label) = 1;
13139 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13140 emit_insn (gen_strset (destreg, dst,
13141 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13142 if (TARGET_64BIT && (align <= 4 || count == 0))
13144 rtx label = ix86_expand_aligntest (countreg, 4);
13145 emit_insn (gen_strset (destreg, dst,
13146 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13147 emit_label (label);
13148 LABEL_NUSES (label) = 1;
13150 if (align > 2 && count != 0 && (count & 2))
13151 emit_insn (gen_strset (destreg, dst,
13152 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13153 if (align <= 2 || count == 0)
13155 rtx label = ix86_expand_aligntest (countreg, 2);
13156 emit_insn (gen_strset (destreg, dst,
13157 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13158 emit_label (label);
13159 LABEL_NUSES (label) = 1;
13161 if (align > 1 && count != 0 && (count & 1))
13162 emit_insn (gen_strset (destreg, dst,
13163 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13164 if (align <= 1 || count == 0)
13166 rtx label = ix86_expand_aligntest (countreg, 1);
13167 emit_insn (gen_strset (destreg, dst,
13168 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13169 emit_label (label);
13170 LABEL_NUSES (label) = 1;
13173 return 1;
13176 /* Expand strlen. */
13178 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13180 rtx addr, scratch1, scratch2, scratch3, scratch4;
13182 /* The generic case of strlen expander is long. Avoid it's
13183 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13185 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13186 && !TARGET_INLINE_ALL_STRINGOPS
13187 && !optimize_size
13188 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13189 return 0;
13191 addr = force_reg (Pmode, XEXP (src, 0));
13192 scratch1 = gen_reg_rtx (Pmode);
13194 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13195 && !optimize_size)
13197 /* Well it seems that some optimizer does not combine a call like
13198 foo(strlen(bar), strlen(bar));
13199 when the move and the subtraction is done here. It does calculate
13200 the length just once when these instructions are done inside of
13201 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13202 often used and I use one fewer register for the lifetime of
13203 output_strlen_unroll() this is better. */
13205 emit_move_insn (out, addr);
13207 ix86_expand_strlensi_unroll_1 (out, src, align);
13209 /* strlensi_unroll_1 returns the address of the zero at the end of
13210 the string, like memchr(), so compute the length by subtracting
13211 the start address. */
13212 if (TARGET_64BIT)
13213 emit_insn (gen_subdi3 (out, out, addr));
13214 else
13215 emit_insn (gen_subsi3 (out, out, addr));
13217 else
13219 rtx unspec;
13220 scratch2 = gen_reg_rtx (Pmode);
13221 scratch3 = gen_reg_rtx (Pmode);
13222 scratch4 = force_reg (Pmode, constm1_rtx);
13224 emit_move_insn (scratch3, addr);
13225 eoschar = force_reg (QImode, eoschar);
13227 emit_insn (gen_cld ());
13228 src = replace_equiv_address_nv (src, scratch3);
13230 /* If .md starts supporting :P, this can be done in .md. */
13231 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13232 scratch4), UNSPEC_SCAS);
13233 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13234 if (TARGET_64BIT)
13236 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13237 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13239 else
13241 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13242 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13245 return 1;
13248 /* Expand the appropriate insns for doing strlen if not just doing
13249 repnz; scasb
13251 out = result, initialized with the start address
13252 align_rtx = alignment of the address.
13253 scratch = scratch register, initialized with the startaddress when
13254 not aligned, otherwise undefined
13256 This is just the body. It needs the initializations mentioned above and
13257 some address computing at the end. These things are done in i386.md. */
13259 static void
13260 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13262 int align;
13263 rtx tmp;
13264 rtx align_2_label = NULL_RTX;
13265 rtx align_3_label = NULL_RTX;
13266 rtx align_4_label = gen_label_rtx ();
13267 rtx end_0_label = gen_label_rtx ();
13268 rtx mem;
13269 rtx tmpreg = gen_reg_rtx (SImode);
13270 rtx scratch = gen_reg_rtx (SImode);
13271 rtx cmp;
13273 align = 0;
13274 if (GET_CODE (align_rtx) == CONST_INT)
13275 align = INTVAL (align_rtx);
13277 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13279 /* Is there a known alignment and is it less than 4? */
13280 if (align < 4)
13282 rtx scratch1 = gen_reg_rtx (Pmode);
13283 emit_move_insn (scratch1, out);
13284 /* Is there a known alignment and is it not 2? */
13285 if (align != 2)
13287 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13288 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13290 /* Leave just the 3 lower bits. */
13291 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13292 NULL_RTX, 0, OPTAB_WIDEN);
13294 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13295 Pmode, 1, align_4_label);
13296 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13297 Pmode, 1, align_2_label);
13298 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13299 Pmode, 1, align_3_label);
13301 else
13303 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13304 check if is aligned to 4 - byte. */
13306 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13307 NULL_RTX, 0, OPTAB_WIDEN);
13309 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13310 Pmode, 1, align_4_label);
13313 mem = change_address (src, QImode, out);
13315 /* Now compare the bytes. */
13317 /* Compare the first n unaligned byte on a byte per byte basis. */
13318 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13319 QImode, 1, end_0_label);
13321 /* Increment the address. */
13322 if (TARGET_64BIT)
13323 emit_insn (gen_adddi3 (out, out, const1_rtx));
13324 else
13325 emit_insn (gen_addsi3 (out, out, const1_rtx));
13327 /* Not needed with an alignment of 2 */
13328 if (align != 2)
13330 emit_label (align_2_label);
13332 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13333 end_0_label);
13335 if (TARGET_64BIT)
13336 emit_insn (gen_adddi3 (out, out, const1_rtx));
13337 else
13338 emit_insn (gen_addsi3 (out, out, const1_rtx));
13340 emit_label (align_3_label);
13343 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13344 end_0_label);
13346 if (TARGET_64BIT)
13347 emit_insn (gen_adddi3 (out, out, const1_rtx));
13348 else
13349 emit_insn (gen_addsi3 (out, out, const1_rtx));
13352 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13353 align this loop. It gives only huge programs, but does not help to
13354 speed up. */
13355 emit_label (align_4_label);
13357 mem = change_address (src, SImode, out);
13358 emit_move_insn (scratch, mem);
13359 if (TARGET_64BIT)
13360 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13361 else
13362 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13364 /* This formula yields a nonzero result iff one of the bytes is zero.
13365 This saves three branches inside loop and many cycles. */
13367 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13368 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13369 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13370 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13371 gen_int_mode (0x80808080, SImode)));
13372 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13373 align_4_label);
13375 if (TARGET_CMOVE)
13377 rtx reg = gen_reg_rtx (SImode);
13378 rtx reg2 = gen_reg_rtx (Pmode);
13379 emit_move_insn (reg, tmpreg);
13380 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13382 /* If zero is not in the first two bytes, move two bytes forward. */
13383 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13384 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13385 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13386 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13387 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13388 reg,
13389 tmpreg)));
13390 /* Emit lea manually to avoid clobbering of flags. */
13391 emit_insn (gen_rtx_SET (SImode, reg2,
13392 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13394 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13395 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13396 emit_insn (gen_rtx_SET (VOIDmode, out,
13397 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13398 reg2,
13399 out)));
13402 else
13404 rtx end_2_label = gen_label_rtx ();
13405 /* Is zero in the first two bytes? */
13407 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13408 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13409 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13410 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13411 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13412 pc_rtx);
13413 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13414 JUMP_LABEL (tmp) = end_2_label;
13416 /* Not in the first two. Move two bytes forward. */
13417 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13418 if (TARGET_64BIT)
13419 emit_insn (gen_adddi3 (out, out, const2_rtx));
13420 else
13421 emit_insn (gen_addsi3 (out, out, const2_rtx));
13423 emit_label (end_2_label);
13427 /* Avoid branch in fixing the byte. */
13428 tmpreg = gen_lowpart (QImode, tmpreg);
13429 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13430 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13431 if (TARGET_64BIT)
13432 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13433 else
13434 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13436 emit_label (end_0_label);
13439 void
13440 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13441 rtx callarg2 ATTRIBUTE_UNUSED,
13442 rtx pop, int sibcall)
13444 rtx use = NULL, call;
13446 if (pop == const0_rtx)
13447 pop = NULL;
13448 gcc_assert (!TARGET_64BIT || !pop);
13450 if (TARGET_MACHO && !TARGET_64BIT)
13452 #if TARGET_MACHO
13453 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13454 fnaddr = machopic_indirect_call_target (fnaddr);
13455 #endif
13457 else
13459 /* Static functions and indirect calls don't need the pic register. */
13460 if (! TARGET_64BIT && flag_pic
13461 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13462 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13463 use_reg (&use, pic_offset_table_rtx);
13466 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13468 rtx al = gen_rtx_REG (QImode, 0);
13469 emit_move_insn (al, callarg2);
13470 use_reg (&use, al);
13473 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13475 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13476 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13478 if (sibcall && TARGET_64BIT
13479 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13481 rtx addr;
13482 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13483 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13484 emit_move_insn (fnaddr, addr);
13485 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13488 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13489 if (retval)
13490 call = gen_rtx_SET (VOIDmode, retval, call);
13491 if (pop)
13493 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13494 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13495 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13498 call = emit_call_insn (call);
13499 if (use)
13500 CALL_INSN_FUNCTION_USAGE (call) = use;
13504 /* Clear stack slot assignments remembered from previous functions.
13505 This is called from INIT_EXPANDERS once before RTL is emitted for each
13506 function. */
13508 static struct machine_function *
13509 ix86_init_machine_status (void)
13511 struct machine_function *f;
13513 f = ggc_alloc_cleared (sizeof (struct machine_function));
13514 f->use_fast_prologue_epilogue_nregs = -1;
13515 f->tls_descriptor_call_expanded_p = 0;
13517 return f;
13520 /* Return a MEM corresponding to a stack slot with mode MODE.
13521 Allocate a new slot if necessary.
13523 The RTL for a function can have several slots available: N is
13524 which slot to use. */
13527 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13529 struct stack_local_entry *s;
13531 gcc_assert (n < MAX_386_STACK_LOCALS);
13533 for (s = ix86_stack_locals; s; s = s->next)
13534 if (s->mode == mode && s->n == n)
13535 return copy_rtx (s->rtl);
13537 s = (struct stack_local_entry *)
13538 ggc_alloc (sizeof (struct stack_local_entry));
13539 s->n = n;
13540 s->mode = mode;
13541 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13543 s->next = ix86_stack_locals;
13544 ix86_stack_locals = s;
13545 return s->rtl;
13548 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13550 static GTY(()) rtx ix86_tls_symbol;
13552 ix86_tls_get_addr (void)
13555 if (!ix86_tls_symbol)
13557 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13558 (TARGET_ANY_GNU_TLS
13559 && !TARGET_64BIT)
13560 ? "___tls_get_addr"
13561 : "__tls_get_addr");
13564 return ix86_tls_symbol;
13567 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13569 static GTY(()) rtx ix86_tls_module_base_symbol;
13571 ix86_tls_module_base (void)
13574 if (!ix86_tls_module_base_symbol)
13576 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13577 "_TLS_MODULE_BASE_");
13578 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13579 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13582 return ix86_tls_module_base_symbol;
13585 /* Calculate the length of the memory address in the instruction
13586 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13589 memory_address_length (rtx addr)
13591 struct ix86_address parts;
13592 rtx base, index, disp;
13593 int len;
13594 int ok;
13596 if (GET_CODE (addr) == PRE_DEC
13597 || GET_CODE (addr) == POST_INC
13598 || GET_CODE (addr) == PRE_MODIFY
13599 || GET_CODE (addr) == POST_MODIFY)
13600 return 0;
13602 ok = ix86_decompose_address (addr, &parts);
13603 gcc_assert (ok);
13605 if (parts.base && GET_CODE (parts.base) == SUBREG)
13606 parts.base = SUBREG_REG (parts.base);
13607 if (parts.index && GET_CODE (parts.index) == SUBREG)
13608 parts.index = SUBREG_REG (parts.index);
13610 base = parts.base;
13611 index = parts.index;
13612 disp = parts.disp;
13613 len = 0;
13615 /* Rule of thumb:
13616 - esp as the base always wants an index,
13617 - ebp as the base always wants a displacement. */
13619 /* Register Indirect. */
13620 if (base && !index && !disp)
13622 /* esp (for its index) and ebp (for its displacement) need
13623 the two-byte modrm form. */
13624 if (addr == stack_pointer_rtx
13625 || addr == arg_pointer_rtx
13626 || addr == frame_pointer_rtx
13627 || addr == hard_frame_pointer_rtx)
13628 len = 1;
13631 /* Direct Addressing. */
13632 else if (disp && !base && !index)
13633 len = 4;
13635 else
13637 /* Find the length of the displacement constant. */
13638 if (disp)
13640 if (base && satisfies_constraint_K (disp))
13641 len = 1;
13642 else
13643 len = 4;
13645 /* ebp always wants a displacement. */
13646 else if (base == hard_frame_pointer_rtx)
13647 len = 1;
13649 /* An index requires the two-byte modrm form.... */
13650 if (index
13651 /* ...like esp, which always wants an index. */
13652 || base == stack_pointer_rtx
13653 || base == arg_pointer_rtx
13654 || base == frame_pointer_rtx)
13655 len += 1;
13658 return len;
13661 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13662 is set, expect that insn have 8bit immediate alternative. */
13664 ix86_attr_length_immediate_default (rtx insn, int shortform)
13666 int len = 0;
13667 int i;
13668 extract_insn_cached (insn);
13669 for (i = recog_data.n_operands - 1; i >= 0; --i)
13670 if (CONSTANT_P (recog_data.operand[i]))
13672 gcc_assert (!len);
13673 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13674 len = 1;
13675 else
13677 switch (get_attr_mode (insn))
13679 case MODE_QI:
13680 len+=1;
13681 break;
13682 case MODE_HI:
13683 len+=2;
13684 break;
13685 case MODE_SI:
13686 len+=4;
13687 break;
13688 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13689 case MODE_DI:
13690 len+=4;
13691 break;
13692 default:
13693 fatal_insn ("unknown insn mode", insn);
13697 return len;
13699 /* Compute default value for "length_address" attribute. */
13701 ix86_attr_length_address_default (rtx insn)
13703 int i;
13705 if (get_attr_type (insn) == TYPE_LEA)
13707 rtx set = PATTERN (insn);
13709 if (GET_CODE (set) == PARALLEL)
13710 set = XVECEXP (set, 0, 0);
13712 gcc_assert (GET_CODE (set) == SET);
13714 return memory_address_length (SET_SRC (set));
13717 extract_insn_cached (insn);
13718 for (i = recog_data.n_operands - 1; i >= 0; --i)
13719 if (GET_CODE (recog_data.operand[i]) == MEM)
13721 return memory_address_length (XEXP (recog_data.operand[i], 0));
13722 break;
13724 return 0;
13727 /* Return the maximum number of instructions a cpu can issue. */
13729 static int
13730 ix86_issue_rate (void)
13732 switch (ix86_tune)
13734 case PROCESSOR_PENTIUM:
13735 case PROCESSOR_K6:
13736 return 2;
13738 case PROCESSOR_PENTIUMPRO:
13739 case PROCESSOR_PENTIUM4:
13740 case PROCESSOR_ATHLON:
13741 case PROCESSOR_K8:
13742 case PROCESSOR_NOCONA:
13743 case PROCESSOR_GENERIC32:
13744 case PROCESSOR_GENERIC64:
13745 return 3;
13747 default:
13748 return 1;
13752 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13753 by DEP_INSN and nothing set by DEP_INSN. */
13755 static int
13756 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13758 rtx set, set2;
13760 /* Simplify the test for uninteresting insns. */
13761 if (insn_type != TYPE_SETCC
13762 && insn_type != TYPE_ICMOV
13763 && insn_type != TYPE_FCMOV
13764 && insn_type != TYPE_IBR)
13765 return 0;
13767 if ((set = single_set (dep_insn)) != 0)
13769 set = SET_DEST (set);
13770 set2 = NULL_RTX;
13772 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13773 && XVECLEN (PATTERN (dep_insn), 0) == 2
13774 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13775 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13777 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13778 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13780 else
13781 return 0;
13783 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13784 return 0;
13786 /* This test is true if the dependent insn reads the flags but
13787 not any other potentially set register. */
13788 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13789 return 0;
13791 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13792 return 0;
13794 return 1;
13797 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13798 address with operands set by DEP_INSN. */
13800 static int
13801 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13803 rtx addr;
13805 if (insn_type == TYPE_LEA
13806 && TARGET_PENTIUM)
13808 addr = PATTERN (insn);
13810 if (GET_CODE (addr) == PARALLEL)
13811 addr = XVECEXP (addr, 0, 0);
13813 gcc_assert (GET_CODE (addr) == SET);
13815 addr = SET_SRC (addr);
13817 else
13819 int i;
13820 extract_insn_cached (insn);
13821 for (i = recog_data.n_operands - 1; i >= 0; --i)
13822 if (GET_CODE (recog_data.operand[i]) == MEM)
13824 addr = XEXP (recog_data.operand[i], 0);
13825 goto found;
13827 return 0;
13828 found:;
13831 return modified_in_p (addr, dep_insn);
13834 static int
13835 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13837 enum attr_type insn_type, dep_insn_type;
13838 enum attr_memory memory;
13839 rtx set, set2;
13840 int dep_insn_code_number;
13842 /* Anti and output dependencies have zero cost on all CPUs. */
13843 if (REG_NOTE_KIND (link) != 0)
13844 return 0;
13846 dep_insn_code_number = recog_memoized (dep_insn);
13848 /* If we can't recognize the insns, we can't really do anything. */
13849 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13850 return cost;
13852 insn_type = get_attr_type (insn);
13853 dep_insn_type = get_attr_type (dep_insn);
13855 switch (ix86_tune)
13857 case PROCESSOR_PENTIUM:
13858 /* Address Generation Interlock adds a cycle of latency. */
13859 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13860 cost += 1;
13862 /* ??? Compares pair with jump/setcc. */
13863 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13864 cost = 0;
13866 /* Floating point stores require value to be ready one cycle earlier. */
13867 if (insn_type == TYPE_FMOV
13868 && get_attr_memory (insn) == MEMORY_STORE
13869 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13870 cost += 1;
13871 break;
13873 case PROCESSOR_PENTIUMPRO:
13874 memory = get_attr_memory (insn);
13876 /* INT->FP conversion is expensive. */
13877 if (get_attr_fp_int_src (dep_insn))
13878 cost += 5;
13880 /* There is one cycle extra latency between an FP op and a store. */
13881 if (insn_type == TYPE_FMOV
13882 && (set = single_set (dep_insn)) != NULL_RTX
13883 && (set2 = single_set (insn)) != NULL_RTX
13884 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13885 && GET_CODE (SET_DEST (set2)) == MEM)
13886 cost += 1;
13888 /* Show ability of reorder buffer to hide latency of load by executing
13889 in parallel with previous instruction in case
13890 previous instruction is not needed to compute the address. */
13891 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13892 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13894 /* Claim moves to take one cycle, as core can issue one load
13895 at time and the next load can start cycle later. */
13896 if (dep_insn_type == TYPE_IMOV
13897 || dep_insn_type == TYPE_FMOV)
13898 cost = 1;
13899 else if (cost > 1)
13900 cost--;
13902 break;
13904 case PROCESSOR_K6:
13905 memory = get_attr_memory (insn);
13907 /* The esp dependency is resolved before the instruction is really
13908 finished. */
13909 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13910 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13911 return 1;
13913 /* INT->FP conversion is expensive. */
13914 if (get_attr_fp_int_src (dep_insn))
13915 cost += 5;
13917 /* Show ability of reorder buffer to hide latency of load by executing
13918 in parallel with previous instruction in case
13919 previous instruction is not needed to compute the address. */
13920 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13921 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13923 /* Claim moves to take one cycle, as core can issue one load
13924 at time and the next load can start cycle later. */
13925 if (dep_insn_type == TYPE_IMOV
13926 || dep_insn_type == TYPE_FMOV)
13927 cost = 1;
13928 else if (cost > 2)
13929 cost -= 2;
13930 else
13931 cost = 1;
13933 break;
13935 case PROCESSOR_ATHLON:
13936 case PROCESSOR_K8:
13937 case PROCESSOR_GENERIC32:
13938 case PROCESSOR_GENERIC64:
13939 memory = get_attr_memory (insn);
13941 /* Show ability of reorder buffer to hide latency of load by executing
13942 in parallel with previous instruction in case
13943 previous instruction is not needed to compute the address. */
13944 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13945 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13947 enum attr_unit unit = get_attr_unit (insn);
13948 int loadcost = 3;
13950 /* Because of the difference between the length of integer and
13951 floating unit pipeline preparation stages, the memory operands
13952 for floating point are cheaper.
13954 ??? For Athlon it the difference is most probably 2. */
13955 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13956 loadcost = 3;
13957 else
13958 loadcost = TARGET_ATHLON ? 2 : 0;
13960 if (cost >= loadcost)
13961 cost -= loadcost;
13962 else
13963 cost = 0;
13966 default:
13967 break;
13970 return cost;
13973 /* How many alternative schedules to try. This should be as wide as the
13974 scheduling freedom in the DFA, but no wider. Making this value too
13975 large results extra work for the scheduler. */
13977 static int
13978 ia32_multipass_dfa_lookahead (void)
13980 if (ix86_tune == PROCESSOR_PENTIUM)
13981 return 2;
13983 if (ix86_tune == PROCESSOR_PENTIUMPRO
13984 || ix86_tune == PROCESSOR_K6)
13985 return 1;
13987 else
13988 return 0;
13992 /* Compute the alignment given to a constant that is being placed in memory.
13993 EXP is the constant and ALIGN is the alignment that the object would
13994 ordinarily have.
13995 The value of this function is used instead of that alignment to align
13996 the object. */
13999 ix86_constant_alignment (tree exp, int align)
14001 if (TREE_CODE (exp) == REAL_CST)
14003 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14004 return 64;
14005 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14006 return 128;
14008 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14009 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14010 return BITS_PER_WORD;
14012 return align;
14015 /* Compute the alignment for a static variable.
14016 TYPE is the data type, and ALIGN is the alignment that
14017 the object would ordinarily have. The value of this function is used
14018 instead of that alignment to align the object. */
14021 ix86_data_alignment (tree type, int align)
14023 int max_align = optimize_size ? BITS_PER_WORD : 256;
14025 if (AGGREGATE_TYPE_P (type)
14026 && TYPE_SIZE (type)
14027 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14028 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14029 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14030 && align < max_align)
14031 align = max_align;
14033 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14034 to 16byte boundary. */
14035 if (TARGET_64BIT)
14037 if (AGGREGATE_TYPE_P (type)
14038 && TYPE_SIZE (type)
14039 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14040 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14041 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14042 return 128;
14045 if (TREE_CODE (type) == ARRAY_TYPE)
14047 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14048 return 64;
14049 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14050 return 128;
14052 else if (TREE_CODE (type) == COMPLEX_TYPE)
14055 if (TYPE_MODE (type) == DCmode && align < 64)
14056 return 64;
14057 if (TYPE_MODE (type) == XCmode && align < 128)
14058 return 128;
14060 else if ((TREE_CODE (type) == RECORD_TYPE
14061 || TREE_CODE (type) == UNION_TYPE
14062 || TREE_CODE (type) == QUAL_UNION_TYPE)
14063 && TYPE_FIELDS (type))
14065 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14066 return 64;
14067 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14068 return 128;
14070 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14071 || TREE_CODE (type) == INTEGER_TYPE)
14073 if (TYPE_MODE (type) == DFmode && align < 64)
14074 return 64;
14075 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14076 return 128;
14079 return align;
14082 /* Compute the alignment for a local variable.
14083 TYPE is the data type, and ALIGN is the alignment that
14084 the object would ordinarily have. The value of this macro is used
14085 instead of that alignment to align the object. */
14088 ix86_local_alignment (tree type, int align)
14090 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14091 to 16byte boundary. */
14092 if (TARGET_64BIT)
14094 if (AGGREGATE_TYPE_P (type)
14095 && TYPE_SIZE (type)
14096 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14097 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14098 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14099 return 128;
14101 if (TREE_CODE (type) == ARRAY_TYPE)
14103 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14104 return 64;
14105 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14106 return 128;
14108 else if (TREE_CODE (type) == COMPLEX_TYPE)
14110 if (TYPE_MODE (type) == DCmode && align < 64)
14111 return 64;
14112 if (TYPE_MODE (type) == XCmode && align < 128)
14113 return 128;
14115 else if ((TREE_CODE (type) == RECORD_TYPE
14116 || TREE_CODE (type) == UNION_TYPE
14117 || TREE_CODE (type) == QUAL_UNION_TYPE)
14118 && TYPE_FIELDS (type))
14120 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14121 return 64;
14122 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14123 return 128;
14125 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14126 || TREE_CODE (type) == INTEGER_TYPE)
14129 if (TYPE_MODE (type) == DFmode && align < 64)
14130 return 64;
14131 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14132 return 128;
14134 return align;
14137 /* Emit RTL insns to initialize the variable parts of a trampoline.
14138 FNADDR is an RTX for the address of the function's pure code.
14139 CXT is an RTX for the static chain value for the function. */
14140 void
14141 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14143 if (!TARGET_64BIT)
14145 /* Compute offset from the end of the jmp to the target function. */
14146 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14147 plus_constant (tramp, 10),
14148 NULL_RTX, 1, OPTAB_DIRECT);
14149 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14150 gen_int_mode (0xb9, QImode));
14151 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14152 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14153 gen_int_mode (0xe9, QImode));
14154 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14156 else
14158 int offset = 0;
14159 /* Try to load address using shorter movl instead of movabs.
14160 We may want to support movq for kernel mode, but kernel does not use
14161 trampolines at the moment. */
14162 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14164 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14165 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14166 gen_int_mode (0xbb41, HImode));
14167 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14168 gen_lowpart (SImode, fnaddr));
14169 offset += 6;
14171 else
14173 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14174 gen_int_mode (0xbb49, HImode));
14175 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14176 fnaddr);
14177 offset += 10;
14179 /* Load static chain using movabs to r10. */
14180 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14181 gen_int_mode (0xba49, HImode));
14182 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14183 cxt);
14184 offset += 10;
14185 /* Jump to the r11 */
14186 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14187 gen_int_mode (0xff49, HImode));
14188 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14189 gen_int_mode (0xe3, QImode));
14190 offset += 3;
14191 gcc_assert (offset <= TRAMPOLINE_SIZE);
14194 #ifdef ENABLE_EXECUTE_STACK
14195 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14196 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14197 #endif
14200 /* Codes for all the SSE/MMX builtins. */
14201 enum ix86_builtins
14203 IX86_BUILTIN_ADDPS,
14204 IX86_BUILTIN_ADDSS,
14205 IX86_BUILTIN_DIVPS,
14206 IX86_BUILTIN_DIVSS,
14207 IX86_BUILTIN_MULPS,
14208 IX86_BUILTIN_MULSS,
14209 IX86_BUILTIN_SUBPS,
14210 IX86_BUILTIN_SUBSS,
14212 IX86_BUILTIN_CMPEQPS,
14213 IX86_BUILTIN_CMPLTPS,
14214 IX86_BUILTIN_CMPLEPS,
14215 IX86_BUILTIN_CMPGTPS,
14216 IX86_BUILTIN_CMPGEPS,
14217 IX86_BUILTIN_CMPNEQPS,
14218 IX86_BUILTIN_CMPNLTPS,
14219 IX86_BUILTIN_CMPNLEPS,
14220 IX86_BUILTIN_CMPNGTPS,
14221 IX86_BUILTIN_CMPNGEPS,
14222 IX86_BUILTIN_CMPORDPS,
14223 IX86_BUILTIN_CMPUNORDPS,
14224 IX86_BUILTIN_CMPEQSS,
14225 IX86_BUILTIN_CMPLTSS,
14226 IX86_BUILTIN_CMPLESS,
14227 IX86_BUILTIN_CMPNEQSS,
14228 IX86_BUILTIN_CMPNLTSS,
14229 IX86_BUILTIN_CMPNLESS,
14230 IX86_BUILTIN_CMPNGTSS,
14231 IX86_BUILTIN_CMPNGESS,
14232 IX86_BUILTIN_CMPORDSS,
14233 IX86_BUILTIN_CMPUNORDSS,
14235 IX86_BUILTIN_COMIEQSS,
14236 IX86_BUILTIN_COMILTSS,
14237 IX86_BUILTIN_COMILESS,
14238 IX86_BUILTIN_COMIGTSS,
14239 IX86_BUILTIN_COMIGESS,
14240 IX86_BUILTIN_COMINEQSS,
14241 IX86_BUILTIN_UCOMIEQSS,
14242 IX86_BUILTIN_UCOMILTSS,
14243 IX86_BUILTIN_UCOMILESS,
14244 IX86_BUILTIN_UCOMIGTSS,
14245 IX86_BUILTIN_UCOMIGESS,
14246 IX86_BUILTIN_UCOMINEQSS,
14248 IX86_BUILTIN_CVTPI2PS,
14249 IX86_BUILTIN_CVTPS2PI,
14250 IX86_BUILTIN_CVTSI2SS,
14251 IX86_BUILTIN_CVTSI642SS,
14252 IX86_BUILTIN_CVTSS2SI,
14253 IX86_BUILTIN_CVTSS2SI64,
14254 IX86_BUILTIN_CVTTPS2PI,
14255 IX86_BUILTIN_CVTTSS2SI,
14256 IX86_BUILTIN_CVTTSS2SI64,
14258 IX86_BUILTIN_MAXPS,
14259 IX86_BUILTIN_MAXSS,
14260 IX86_BUILTIN_MINPS,
14261 IX86_BUILTIN_MINSS,
14263 IX86_BUILTIN_LOADUPS,
14264 IX86_BUILTIN_STOREUPS,
14265 IX86_BUILTIN_MOVSS,
14267 IX86_BUILTIN_MOVHLPS,
14268 IX86_BUILTIN_MOVLHPS,
14269 IX86_BUILTIN_LOADHPS,
14270 IX86_BUILTIN_LOADLPS,
14271 IX86_BUILTIN_STOREHPS,
14272 IX86_BUILTIN_STORELPS,
14274 IX86_BUILTIN_MASKMOVQ,
14275 IX86_BUILTIN_MOVMSKPS,
14276 IX86_BUILTIN_PMOVMSKB,
14278 IX86_BUILTIN_MOVNTPS,
14279 IX86_BUILTIN_MOVNTQ,
14281 IX86_BUILTIN_LOADDQU,
14282 IX86_BUILTIN_STOREDQU,
14284 IX86_BUILTIN_PACKSSWB,
14285 IX86_BUILTIN_PACKSSDW,
14286 IX86_BUILTIN_PACKUSWB,
14288 IX86_BUILTIN_PADDB,
14289 IX86_BUILTIN_PADDW,
14290 IX86_BUILTIN_PADDD,
14291 IX86_BUILTIN_PADDQ,
14292 IX86_BUILTIN_PADDSB,
14293 IX86_BUILTIN_PADDSW,
14294 IX86_BUILTIN_PADDUSB,
14295 IX86_BUILTIN_PADDUSW,
14296 IX86_BUILTIN_PSUBB,
14297 IX86_BUILTIN_PSUBW,
14298 IX86_BUILTIN_PSUBD,
14299 IX86_BUILTIN_PSUBQ,
14300 IX86_BUILTIN_PSUBSB,
14301 IX86_BUILTIN_PSUBSW,
14302 IX86_BUILTIN_PSUBUSB,
14303 IX86_BUILTIN_PSUBUSW,
14305 IX86_BUILTIN_PAND,
14306 IX86_BUILTIN_PANDN,
14307 IX86_BUILTIN_POR,
14308 IX86_BUILTIN_PXOR,
14310 IX86_BUILTIN_PAVGB,
14311 IX86_BUILTIN_PAVGW,
14313 IX86_BUILTIN_PCMPEQB,
14314 IX86_BUILTIN_PCMPEQW,
14315 IX86_BUILTIN_PCMPEQD,
14316 IX86_BUILTIN_PCMPGTB,
14317 IX86_BUILTIN_PCMPGTW,
14318 IX86_BUILTIN_PCMPGTD,
14320 IX86_BUILTIN_PMADDWD,
14322 IX86_BUILTIN_PMAXSW,
14323 IX86_BUILTIN_PMAXUB,
14324 IX86_BUILTIN_PMINSW,
14325 IX86_BUILTIN_PMINUB,
14327 IX86_BUILTIN_PMULHUW,
14328 IX86_BUILTIN_PMULHW,
14329 IX86_BUILTIN_PMULLW,
14331 IX86_BUILTIN_PSADBW,
14332 IX86_BUILTIN_PSHUFW,
14334 IX86_BUILTIN_PSLLW,
14335 IX86_BUILTIN_PSLLD,
14336 IX86_BUILTIN_PSLLQ,
14337 IX86_BUILTIN_PSRAW,
14338 IX86_BUILTIN_PSRAD,
14339 IX86_BUILTIN_PSRLW,
14340 IX86_BUILTIN_PSRLD,
14341 IX86_BUILTIN_PSRLQ,
14342 IX86_BUILTIN_PSLLWI,
14343 IX86_BUILTIN_PSLLDI,
14344 IX86_BUILTIN_PSLLQI,
14345 IX86_BUILTIN_PSRAWI,
14346 IX86_BUILTIN_PSRADI,
14347 IX86_BUILTIN_PSRLWI,
14348 IX86_BUILTIN_PSRLDI,
14349 IX86_BUILTIN_PSRLQI,
14351 IX86_BUILTIN_PUNPCKHBW,
14352 IX86_BUILTIN_PUNPCKHWD,
14353 IX86_BUILTIN_PUNPCKHDQ,
14354 IX86_BUILTIN_PUNPCKLBW,
14355 IX86_BUILTIN_PUNPCKLWD,
14356 IX86_BUILTIN_PUNPCKLDQ,
14358 IX86_BUILTIN_SHUFPS,
14360 IX86_BUILTIN_RCPPS,
14361 IX86_BUILTIN_RCPSS,
14362 IX86_BUILTIN_RSQRTPS,
14363 IX86_BUILTIN_RSQRTSS,
14364 IX86_BUILTIN_SQRTPS,
14365 IX86_BUILTIN_SQRTSS,
14367 IX86_BUILTIN_UNPCKHPS,
14368 IX86_BUILTIN_UNPCKLPS,
14370 IX86_BUILTIN_ANDPS,
14371 IX86_BUILTIN_ANDNPS,
14372 IX86_BUILTIN_ORPS,
14373 IX86_BUILTIN_XORPS,
14375 IX86_BUILTIN_EMMS,
14376 IX86_BUILTIN_LDMXCSR,
14377 IX86_BUILTIN_STMXCSR,
14378 IX86_BUILTIN_SFENCE,
14380 /* 3DNow! Original */
14381 IX86_BUILTIN_FEMMS,
14382 IX86_BUILTIN_PAVGUSB,
14383 IX86_BUILTIN_PF2ID,
14384 IX86_BUILTIN_PFACC,
14385 IX86_BUILTIN_PFADD,
14386 IX86_BUILTIN_PFCMPEQ,
14387 IX86_BUILTIN_PFCMPGE,
14388 IX86_BUILTIN_PFCMPGT,
14389 IX86_BUILTIN_PFMAX,
14390 IX86_BUILTIN_PFMIN,
14391 IX86_BUILTIN_PFMUL,
14392 IX86_BUILTIN_PFRCP,
14393 IX86_BUILTIN_PFRCPIT1,
14394 IX86_BUILTIN_PFRCPIT2,
14395 IX86_BUILTIN_PFRSQIT1,
14396 IX86_BUILTIN_PFRSQRT,
14397 IX86_BUILTIN_PFSUB,
14398 IX86_BUILTIN_PFSUBR,
14399 IX86_BUILTIN_PI2FD,
14400 IX86_BUILTIN_PMULHRW,
14402 /* 3DNow! Athlon Extensions */
14403 IX86_BUILTIN_PF2IW,
14404 IX86_BUILTIN_PFNACC,
14405 IX86_BUILTIN_PFPNACC,
14406 IX86_BUILTIN_PI2FW,
14407 IX86_BUILTIN_PSWAPDSI,
14408 IX86_BUILTIN_PSWAPDSF,
14410 /* SSE2 */
14411 IX86_BUILTIN_ADDPD,
14412 IX86_BUILTIN_ADDSD,
14413 IX86_BUILTIN_DIVPD,
14414 IX86_BUILTIN_DIVSD,
14415 IX86_BUILTIN_MULPD,
14416 IX86_BUILTIN_MULSD,
14417 IX86_BUILTIN_SUBPD,
14418 IX86_BUILTIN_SUBSD,
14420 IX86_BUILTIN_CMPEQPD,
14421 IX86_BUILTIN_CMPLTPD,
14422 IX86_BUILTIN_CMPLEPD,
14423 IX86_BUILTIN_CMPGTPD,
14424 IX86_BUILTIN_CMPGEPD,
14425 IX86_BUILTIN_CMPNEQPD,
14426 IX86_BUILTIN_CMPNLTPD,
14427 IX86_BUILTIN_CMPNLEPD,
14428 IX86_BUILTIN_CMPNGTPD,
14429 IX86_BUILTIN_CMPNGEPD,
14430 IX86_BUILTIN_CMPORDPD,
14431 IX86_BUILTIN_CMPUNORDPD,
14432 IX86_BUILTIN_CMPNEPD,
14433 IX86_BUILTIN_CMPEQSD,
14434 IX86_BUILTIN_CMPLTSD,
14435 IX86_BUILTIN_CMPLESD,
14436 IX86_BUILTIN_CMPNEQSD,
14437 IX86_BUILTIN_CMPNLTSD,
14438 IX86_BUILTIN_CMPNLESD,
14439 IX86_BUILTIN_CMPORDSD,
14440 IX86_BUILTIN_CMPUNORDSD,
14441 IX86_BUILTIN_CMPNESD,
14443 IX86_BUILTIN_COMIEQSD,
14444 IX86_BUILTIN_COMILTSD,
14445 IX86_BUILTIN_COMILESD,
14446 IX86_BUILTIN_COMIGTSD,
14447 IX86_BUILTIN_COMIGESD,
14448 IX86_BUILTIN_COMINEQSD,
14449 IX86_BUILTIN_UCOMIEQSD,
14450 IX86_BUILTIN_UCOMILTSD,
14451 IX86_BUILTIN_UCOMILESD,
14452 IX86_BUILTIN_UCOMIGTSD,
14453 IX86_BUILTIN_UCOMIGESD,
14454 IX86_BUILTIN_UCOMINEQSD,
14456 IX86_BUILTIN_MAXPD,
14457 IX86_BUILTIN_MAXSD,
14458 IX86_BUILTIN_MINPD,
14459 IX86_BUILTIN_MINSD,
14461 IX86_BUILTIN_ANDPD,
14462 IX86_BUILTIN_ANDNPD,
14463 IX86_BUILTIN_ORPD,
14464 IX86_BUILTIN_XORPD,
14466 IX86_BUILTIN_SQRTPD,
14467 IX86_BUILTIN_SQRTSD,
14469 IX86_BUILTIN_UNPCKHPD,
14470 IX86_BUILTIN_UNPCKLPD,
14472 IX86_BUILTIN_SHUFPD,
14474 IX86_BUILTIN_LOADUPD,
14475 IX86_BUILTIN_STOREUPD,
14476 IX86_BUILTIN_MOVSD,
14478 IX86_BUILTIN_LOADHPD,
14479 IX86_BUILTIN_LOADLPD,
14481 IX86_BUILTIN_CVTDQ2PD,
14482 IX86_BUILTIN_CVTDQ2PS,
14484 IX86_BUILTIN_CVTPD2DQ,
14485 IX86_BUILTIN_CVTPD2PI,
14486 IX86_BUILTIN_CVTPD2PS,
14487 IX86_BUILTIN_CVTTPD2DQ,
14488 IX86_BUILTIN_CVTTPD2PI,
14490 IX86_BUILTIN_CVTPI2PD,
14491 IX86_BUILTIN_CVTSI2SD,
14492 IX86_BUILTIN_CVTSI642SD,
14494 IX86_BUILTIN_CVTSD2SI,
14495 IX86_BUILTIN_CVTSD2SI64,
14496 IX86_BUILTIN_CVTSD2SS,
14497 IX86_BUILTIN_CVTSS2SD,
14498 IX86_BUILTIN_CVTTSD2SI,
14499 IX86_BUILTIN_CVTTSD2SI64,
14501 IX86_BUILTIN_CVTPS2DQ,
14502 IX86_BUILTIN_CVTPS2PD,
14503 IX86_BUILTIN_CVTTPS2DQ,
14505 IX86_BUILTIN_MOVNTI,
14506 IX86_BUILTIN_MOVNTPD,
14507 IX86_BUILTIN_MOVNTDQ,
14509 /* SSE2 MMX */
14510 IX86_BUILTIN_MASKMOVDQU,
14511 IX86_BUILTIN_MOVMSKPD,
14512 IX86_BUILTIN_PMOVMSKB128,
14514 IX86_BUILTIN_PACKSSWB128,
14515 IX86_BUILTIN_PACKSSDW128,
14516 IX86_BUILTIN_PACKUSWB128,
14518 IX86_BUILTIN_PADDB128,
14519 IX86_BUILTIN_PADDW128,
14520 IX86_BUILTIN_PADDD128,
14521 IX86_BUILTIN_PADDQ128,
14522 IX86_BUILTIN_PADDSB128,
14523 IX86_BUILTIN_PADDSW128,
14524 IX86_BUILTIN_PADDUSB128,
14525 IX86_BUILTIN_PADDUSW128,
14526 IX86_BUILTIN_PSUBB128,
14527 IX86_BUILTIN_PSUBW128,
14528 IX86_BUILTIN_PSUBD128,
14529 IX86_BUILTIN_PSUBQ128,
14530 IX86_BUILTIN_PSUBSB128,
14531 IX86_BUILTIN_PSUBSW128,
14532 IX86_BUILTIN_PSUBUSB128,
14533 IX86_BUILTIN_PSUBUSW128,
14535 IX86_BUILTIN_PAND128,
14536 IX86_BUILTIN_PANDN128,
14537 IX86_BUILTIN_POR128,
14538 IX86_BUILTIN_PXOR128,
14540 IX86_BUILTIN_PAVGB128,
14541 IX86_BUILTIN_PAVGW128,
14543 IX86_BUILTIN_PCMPEQB128,
14544 IX86_BUILTIN_PCMPEQW128,
14545 IX86_BUILTIN_PCMPEQD128,
14546 IX86_BUILTIN_PCMPGTB128,
14547 IX86_BUILTIN_PCMPGTW128,
14548 IX86_BUILTIN_PCMPGTD128,
14550 IX86_BUILTIN_PMADDWD128,
14552 IX86_BUILTIN_PMAXSW128,
14553 IX86_BUILTIN_PMAXUB128,
14554 IX86_BUILTIN_PMINSW128,
14555 IX86_BUILTIN_PMINUB128,
14557 IX86_BUILTIN_PMULUDQ,
14558 IX86_BUILTIN_PMULUDQ128,
14559 IX86_BUILTIN_PMULHUW128,
14560 IX86_BUILTIN_PMULHW128,
14561 IX86_BUILTIN_PMULLW128,
14563 IX86_BUILTIN_PSADBW128,
14564 IX86_BUILTIN_PSHUFHW,
14565 IX86_BUILTIN_PSHUFLW,
14566 IX86_BUILTIN_PSHUFD,
14568 IX86_BUILTIN_PSLLW128,
14569 IX86_BUILTIN_PSLLD128,
14570 IX86_BUILTIN_PSLLQ128,
14571 IX86_BUILTIN_PSRAW128,
14572 IX86_BUILTIN_PSRAD128,
14573 IX86_BUILTIN_PSRLW128,
14574 IX86_BUILTIN_PSRLD128,
14575 IX86_BUILTIN_PSRLQ128,
14576 IX86_BUILTIN_PSLLDQI128,
14577 IX86_BUILTIN_PSLLWI128,
14578 IX86_BUILTIN_PSLLDI128,
14579 IX86_BUILTIN_PSLLQI128,
14580 IX86_BUILTIN_PSRAWI128,
14581 IX86_BUILTIN_PSRADI128,
14582 IX86_BUILTIN_PSRLDQI128,
14583 IX86_BUILTIN_PSRLWI128,
14584 IX86_BUILTIN_PSRLDI128,
14585 IX86_BUILTIN_PSRLQI128,
14587 IX86_BUILTIN_PUNPCKHBW128,
14588 IX86_BUILTIN_PUNPCKHWD128,
14589 IX86_BUILTIN_PUNPCKHDQ128,
14590 IX86_BUILTIN_PUNPCKHQDQ128,
14591 IX86_BUILTIN_PUNPCKLBW128,
14592 IX86_BUILTIN_PUNPCKLWD128,
14593 IX86_BUILTIN_PUNPCKLDQ128,
14594 IX86_BUILTIN_PUNPCKLQDQ128,
14596 IX86_BUILTIN_CLFLUSH,
14597 IX86_BUILTIN_MFENCE,
14598 IX86_BUILTIN_LFENCE,
14600 /* Prescott New Instructions. */
14601 IX86_BUILTIN_ADDSUBPS,
14602 IX86_BUILTIN_HADDPS,
14603 IX86_BUILTIN_HSUBPS,
14604 IX86_BUILTIN_MOVSHDUP,
14605 IX86_BUILTIN_MOVSLDUP,
14606 IX86_BUILTIN_ADDSUBPD,
14607 IX86_BUILTIN_HADDPD,
14608 IX86_BUILTIN_HSUBPD,
14609 IX86_BUILTIN_LDDQU,
14611 IX86_BUILTIN_MONITOR,
14612 IX86_BUILTIN_MWAIT,
14614 /* SSSE3. */
14615 IX86_BUILTIN_PHADDW,
14616 IX86_BUILTIN_PHADDD,
14617 IX86_BUILTIN_PHADDSW,
14618 IX86_BUILTIN_PHSUBW,
14619 IX86_BUILTIN_PHSUBD,
14620 IX86_BUILTIN_PHSUBSW,
14621 IX86_BUILTIN_PMADDUBSW,
14622 IX86_BUILTIN_PMULHRSW,
14623 IX86_BUILTIN_PSHUFB,
14624 IX86_BUILTIN_PSIGNB,
14625 IX86_BUILTIN_PSIGNW,
14626 IX86_BUILTIN_PSIGND,
14627 IX86_BUILTIN_PALIGNR,
14628 IX86_BUILTIN_PABSB,
14629 IX86_BUILTIN_PABSW,
14630 IX86_BUILTIN_PABSD,
14632 IX86_BUILTIN_PHADDW128,
14633 IX86_BUILTIN_PHADDD128,
14634 IX86_BUILTIN_PHADDSW128,
14635 IX86_BUILTIN_PHSUBW128,
14636 IX86_BUILTIN_PHSUBD128,
14637 IX86_BUILTIN_PHSUBSW128,
14638 IX86_BUILTIN_PMADDUBSW128,
14639 IX86_BUILTIN_PMULHRSW128,
14640 IX86_BUILTIN_PSHUFB128,
14641 IX86_BUILTIN_PSIGNB128,
14642 IX86_BUILTIN_PSIGNW128,
14643 IX86_BUILTIN_PSIGND128,
14644 IX86_BUILTIN_PALIGNR128,
14645 IX86_BUILTIN_PABSB128,
14646 IX86_BUILTIN_PABSW128,
14647 IX86_BUILTIN_PABSD128,
14649 IX86_BUILTIN_VEC_INIT_V2SI,
14650 IX86_BUILTIN_VEC_INIT_V4HI,
14651 IX86_BUILTIN_VEC_INIT_V8QI,
14652 IX86_BUILTIN_VEC_EXT_V2DF,
14653 IX86_BUILTIN_VEC_EXT_V2DI,
14654 IX86_BUILTIN_VEC_EXT_V4SF,
14655 IX86_BUILTIN_VEC_EXT_V4SI,
14656 IX86_BUILTIN_VEC_EXT_V8HI,
14657 IX86_BUILTIN_VEC_EXT_V2SI,
14658 IX86_BUILTIN_VEC_EXT_V4HI,
14659 IX86_BUILTIN_VEC_SET_V8HI,
14660 IX86_BUILTIN_VEC_SET_V4HI,
14662 IX86_BUILTIN_MAX
14665 #define def_builtin(MASK, NAME, TYPE, CODE) \
14666 do { \
14667 if ((MASK) & target_flags \
14668 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14669 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14670 NULL, NULL_TREE); \
14671 } while (0)
14673 /* Bits for builtin_description.flag. */
14675 /* Set when we don't support the comparison natively, and should
14676 swap_comparison in order to support it. */
14677 #define BUILTIN_DESC_SWAP_OPERANDS 1
14679 struct builtin_description
14681 const unsigned int mask;
14682 const enum insn_code icode;
14683 const char *const name;
14684 const enum ix86_builtins code;
14685 const enum rtx_code comparison;
14686 const unsigned int flag;
14689 static const struct builtin_description bdesc_comi[] =
14691 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14692 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14693 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14694 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14695 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14696 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14697 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14698 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14699 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14700 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14701 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14702 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14703 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14704 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14705 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14706 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14707 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14708 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14709 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14710 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14711 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14712 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14713 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14714 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14717 static const struct builtin_description bdesc_2arg[] =
14719 /* SSE */
14720 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14721 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14722 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14723 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14724 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14725 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14726 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14727 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14729 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14730 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14731 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14732 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14733 BUILTIN_DESC_SWAP_OPERANDS },
14734 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14735 BUILTIN_DESC_SWAP_OPERANDS },
14736 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14737 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14738 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14739 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14740 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14741 BUILTIN_DESC_SWAP_OPERANDS },
14742 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14743 BUILTIN_DESC_SWAP_OPERANDS },
14744 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14745 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14746 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14747 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14748 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14749 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14750 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14751 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14752 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14753 BUILTIN_DESC_SWAP_OPERANDS },
14754 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14755 BUILTIN_DESC_SWAP_OPERANDS },
14756 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14758 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14759 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14760 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14761 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14763 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14764 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14765 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14766 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14768 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14769 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14770 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14771 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14772 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14774 /* MMX */
14775 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14776 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14777 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14778 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14779 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14780 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14781 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14782 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14784 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14785 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14786 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14787 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14788 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14789 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14790 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14791 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14793 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14794 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14795 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14797 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14798 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14799 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14800 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14802 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14803 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14805 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14806 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14807 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14808 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14809 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14810 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14812 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14813 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14814 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14815 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14817 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14818 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14819 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14820 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14821 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14822 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14824 /* Special. */
14825 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14826 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14827 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14829 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14830 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14831 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14833 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14834 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14835 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14836 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14837 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14838 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14840 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14841 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14842 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14843 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14844 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14845 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14847 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14848 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14849 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14850 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14852 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14853 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14855 /* SSE2 */
14856 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14857 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14858 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14859 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14860 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14861 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14862 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14863 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14865 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14866 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14867 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14868 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14869 BUILTIN_DESC_SWAP_OPERANDS },
14870 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14871 BUILTIN_DESC_SWAP_OPERANDS },
14872 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14873 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14874 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14875 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14876 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14877 BUILTIN_DESC_SWAP_OPERANDS },
14878 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14879 BUILTIN_DESC_SWAP_OPERANDS },
14880 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14881 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14882 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14883 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14884 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14885 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14886 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14887 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14888 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14890 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14891 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14892 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14893 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14895 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14896 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14897 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14898 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14900 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14901 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14902 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14904 /* SSE2 MMX */
14905 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14906 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14907 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14908 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14909 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14910 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14911 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14912 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14914 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14915 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14916 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14917 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14918 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14919 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14920 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14921 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14923 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14924 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14926 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14927 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14928 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14929 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14931 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14932 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14934 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14935 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14936 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14937 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14938 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14939 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14941 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14942 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14943 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14944 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14946 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14947 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14948 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14949 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14950 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14951 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14952 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14953 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14955 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14956 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14957 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14959 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14960 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14962 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14963 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14965 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14966 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14967 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14969 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14970 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14971 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14973 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14974 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14976 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14978 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14979 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14980 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14981 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14983 /* SSE3 MMX */
14984 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14985 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14986 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14987 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14988 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14989 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
14991 /* SSSE3 */
14992 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
14993 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
14994 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
14995 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
14996 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
14997 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
14998 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
14999 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15000 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15001 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15002 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15003 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15004 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15005 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15006 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15007 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15008 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15009 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15010 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15011 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15012 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15013 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15014 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15015 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15018 static const struct builtin_description bdesc_1arg[] =
15020 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15021 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15023 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15024 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15025 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15027 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15028 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15029 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15030 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15031 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15032 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15034 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15035 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15037 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15039 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15040 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15042 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15043 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15044 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15045 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15046 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15048 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15050 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15051 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15052 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15053 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15055 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15056 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15057 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15059 /* SSE3 */
15060 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15061 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15063 /* SSSE3 */
15064 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15065 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15066 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15067 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15068 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15069 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15072 static void
15073 ix86_init_builtins (void)
15075 if (TARGET_MMX)
15076 ix86_init_mmx_sse_builtins ();
15079 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15080 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15081 builtins. */
15082 static void
15083 ix86_init_mmx_sse_builtins (void)
15085 const struct builtin_description * d;
15086 size_t i;
15088 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15089 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15090 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15091 tree V2DI_type_node
15092 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15093 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15094 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15095 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15096 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15097 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15098 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15100 tree pchar_type_node = build_pointer_type (char_type_node);
15101 tree pcchar_type_node = build_pointer_type (
15102 build_type_variant (char_type_node, 1, 0));
15103 tree pfloat_type_node = build_pointer_type (float_type_node);
15104 tree pcfloat_type_node = build_pointer_type (
15105 build_type_variant (float_type_node, 1, 0));
15106 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15107 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15108 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15110 /* Comparisons. */
15111 tree int_ftype_v4sf_v4sf
15112 = build_function_type_list (integer_type_node,
15113 V4SF_type_node, V4SF_type_node, NULL_TREE);
15114 tree v4si_ftype_v4sf_v4sf
15115 = build_function_type_list (V4SI_type_node,
15116 V4SF_type_node, V4SF_type_node, NULL_TREE);
15117 /* MMX/SSE/integer conversions. */
15118 tree int_ftype_v4sf
15119 = build_function_type_list (integer_type_node,
15120 V4SF_type_node, NULL_TREE);
15121 tree int64_ftype_v4sf
15122 = build_function_type_list (long_long_integer_type_node,
15123 V4SF_type_node, NULL_TREE);
15124 tree int_ftype_v8qi
15125 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15126 tree v4sf_ftype_v4sf_int
15127 = build_function_type_list (V4SF_type_node,
15128 V4SF_type_node, integer_type_node, NULL_TREE);
15129 tree v4sf_ftype_v4sf_int64
15130 = build_function_type_list (V4SF_type_node,
15131 V4SF_type_node, long_long_integer_type_node,
15132 NULL_TREE);
15133 tree v4sf_ftype_v4sf_v2si
15134 = build_function_type_list (V4SF_type_node,
15135 V4SF_type_node, V2SI_type_node, NULL_TREE);
15137 /* Miscellaneous. */
15138 tree v8qi_ftype_v4hi_v4hi
15139 = build_function_type_list (V8QI_type_node,
15140 V4HI_type_node, V4HI_type_node, NULL_TREE);
15141 tree v4hi_ftype_v2si_v2si
15142 = build_function_type_list (V4HI_type_node,
15143 V2SI_type_node, V2SI_type_node, NULL_TREE);
15144 tree v4sf_ftype_v4sf_v4sf_int
15145 = build_function_type_list (V4SF_type_node,
15146 V4SF_type_node, V4SF_type_node,
15147 integer_type_node, NULL_TREE);
15148 tree v2si_ftype_v4hi_v4hi
15149 = build_function_type_list (V2SI_type_node,
15150 V4HI_type_node, V4HI_type_node, NULL_TREE);
15151 tree v4hi_ftype_v4hi_int
15152 = build_function_type_list (V4HI_type_node,
15153 V4HI_type_node, integer_type_node, NULL_TREE);
15154 tree v4hi_ftype_v4hi_di
15155 = build_function_type_list (V4HI_type_node,
15156 V4HI_type_node, long_long_unsigned_type_node,
15157 NULL_TREE);
15158 tree v2si_ftype_v2si_di
15159 = build_function_type_list (V2SI_type_node,
15160 V2SI_type_node, long_long_unsigned_type_node,
15161 NULL_TREE);
15162 tree void_ftype_void
15163 = build_function_type (void_type_node, void_list_node);
15164 tree void_ftype_unsigned
15165 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15166 tree void_ftype_unsigned_unsigned
15167 = build_function_type_list (void_type_node, unsigned_type_node,
15168 unsigned_type_node, NULL_TREE);
15169 tree void_ftype_pcvoid_unsigned_unsigned
15170 = build_function_type_list (void_type_node, const_ptr_type_node,
15171 unsigned_type_node, unsigned_type_node,
15172 NULL_TREE);
15173 tree unsigned_ftype_void
15174 = build_function_type (unsigned_type_node, void_list_node);
15175 tree v2si_ftype_v4sf
15176 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15177 /* Loads/stores. */
15178 tree void_ftype_v8qi_v8qi_pchar
15179 = build_function_type_list (void_type_node,
15180 V8QI_type_node, V8QI_type_node,
15181 pchar_type_node, NULL_TREE);
15182 tree v4sf_ftype_pcfloat
15183 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15184 /* @@@ the type is bogus */
15185 tree v4sf_ftype_v4sf_pv2si
15186 = build_function_type_list (V4SF_type_node,
15187 V4SF_type_node, pv2si_type_node, NULL_TREE);
15188 tree void_ftype_pv2si_v4sf
15189 = build_function_type_list (void_type_node,
15190 pv2si_type_node, V4SF_type_node, NULL_TREE);
15191 tree void_ftype_pfloat_v4sf
15192 = build_function_type_list (void_type_node,
15193 pfloat_type_node, V4SF_type_node, NULL_TREE);
15194 tree void_ftype_pdi_di
15195 = build_function_type_list (void_type_node,
15196 pdi_type_node, long_long_unsigned_type_node,
15197 NULL_TREE);
15198 tree void_ftype_pv2di_v2di
15199 = build_function_type_list (void_type_node,
15200 pv2di_type_node, V2DI_type_node, NULL_TREE);
15201 /* Normal vector unops. */
15202 tree v4sf_ftype_v4sf
15203 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15204 tree v16qi_ftype_v16qi
15205 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15206 tree v8hi_ftype_v8hi
15207 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15208 tree v4si_ftype_v4si
15209 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15210 tree v8qi_ftype_v8qi
15211 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15212 tree v4hi_ftype_v4hi
15213 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15215 /* Normal vector binops. */
15216 tree v4sf_ftype_v4sf_v4sf
15217 = build_function_type_list (V4SF_type_node,
15218 V4SF_type_node, V4SF_type_node, NULL_TREE);
15219 tree v8qi_ftype_v8qi_v8qi
15220 = build_function_type_list (V8QI_type_node,
15221 V8QI_type_node, V8QI_type_node, NULL_TREE);
15222 tree v4hi_ftype_v4hi_v4hi
15223 = build_function_type_list (V4HI_type_node,
15224 V4HI_type_node, V4HI_type_node, NULL_TREE);
15225 tree v2si_ftype_v2si_v2si
15226 = build_function_type_list (V2SI_type_node,
15227 V2SI_type_node, V2SI_type_node, NULL_TREE);
15228 tree di_ftype_di_di
15229 = build_function_type_list (long_long_unsigned_type_node,
15230 long_long_unsigned_type_node,
15231 long_long_unsigned_type_node, NULL_TREE);
15233 tree di_ftype_di_di_int
15234 = build_function_type_list (long_long_unsigned_type_node,
15235 long_long_unsigned_type_node,
15236 long_long_unsigned_type_node,
15237 integer_type_node, NULL_TREE);
15239 tree v2si_ftype_v2sf
15240 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15241 tree v2sf_ftype_v2si
15242 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15243 tree v2si_ftype_v2si
15244 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15245 tree v2sf_ftype_v2sf
15246 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15247 tree v2sf_ftype_v2sf_v2sf
15248 = build_function_type_list (V2SF_type_node,
15249 V2SF_type_node, V2SF_type_node, NULL_TREE);
15250 tree v2si_ftype_v2sf_v2sf
15251 = build_function_type_list (V2SI_type_node,
15252 V2SF_type_node, V2SF_type_node, NULL_TREE);
15253 tree pint_type_node = build_pointer_type (integer_type_node);
15254 tree pdouble_type_node = build_pointer_type (double_type_node);
15255 tree pcdouble_type_node = build_pointer_type (
15256 build_type_variant (double_type_node, 1, 0));
15257 tree int_ftype_v2df_v2df
15258 = build_function_type_list (integer_type_node,
15259 V2DF_type_node, V2DF_type_node, NULL_TREE);
15261 tree void_ftype_pcvoid
15262 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15263 tree v4sf_ftype_v4si
15264 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15265 tree v4si_ftype_v4sf
15266 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15267 tree v2df_ftype_v4si
15268 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15269 tree v4si_ftype_v2df
15270 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15271 tree v2si_ftype_v2df
15272 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15273 tree v4sf_ftype_v2df
15274 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15275 tree v2df_ftype_v2si
15276 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15277 tree v2df_ftype_v4sf
15278 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15279 tree int_ftype_v2df
15280 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15281 tree int64_ftype_v2df
15282 = build_function_type_list (long_long_integer_type_node,
15283 V2DF_type_node, NULL_TREE);
15284 tree v2df_ftype_v2df_int
15285 = build_function_type_list (V2DF_type_node,
15286 V2DF_type_node, integer_type_node, NULL_TREE);
15287 tree v2df_ftype_v2df_int64
15288 = build_function_type_list (V2DF_type_node,
15289 V2DF_type_node, long_long_integer_type_node,
15290 NULL_TREE);
15291 tree v4sf_ftype_v4sf_v2df
15292 = build_function_type_list (V4SF_type_node,
15293 V4SF_type_node, V2DF_type_node, NULL_TREE);
15294 tree v2df_ftype_v2df_v4sf
15295 = build_function_type_list (V2DF_type_node,
15296 V2DF_type_node, V4SF_type_node, NULL_TREE);
15297 tree v2df_ftype_v2df_v2df_int
15298 = build_function_type_list (V2DF_type_node,
15299 V2DF_type_node, V2DF_type_node,
15300 integer_type_node,
15301 NULL_TREE);
15302 tree v2df_ftype_v2df_pcdouble
15303 = build_function_type_list (V2DF_type_node,
15304 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15305 tree void_ftype_pdouble_v2df
15306 = build_function_type_list (void_type_node,
15307 pdouble_type_node, V2DF_type_node, NULL_TREE);
15308 tree void_ftype_pint_int
15309 = build_function_type_list (void_type_node,
15310 pint_type_node, integer_type_node, NULL_TREE);
15311 tree void_ftype_v16qi_v16qi_pchar
15312 = build_function_type_list (void_type_node,
15313 V16QI_type_node, V16QI_type_node,
15314 pchar_type_node, NULL_TREE);
15315 tree v2df_ftype_pcdouble
15316 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15317 tree v2df_ftype_v2df_v2df
15318 = build_function_type_list (V2DF_type_node,
15319 V2DF_type_node, V2DF_type_node, NULL_TREE);
15320 tree v16qi_ftype_v16qi_v16qi
15321 = build_function_type_list (V16QI_type_node,
15322 V16QI_type_node, V16QI_type_node, NULL_TREE);
15323 tree v8hi_ftype_v8hi_v8hi
15324 = build_function_type_list (V8HI_type_node,
15325 V8HI_type_node, V8HI_type_node, NULL_TREE);
15326 tree v4si_ftype_v4si_v4si
15327 = build_function_type_list (V4SI_type_node,
15328 V4SI_type_node, V4SI_type_node, NULL_TREE);
15329 tree v2di_ftype_v2di_v2di
15330 = build_function_type_list (V2DI_type_node,
15331 V2DI_type_node, V2DI_type_node, NULL_TREE);
15332 tree v2di_ftype_v2df_v2df
15333 = build_function_type_list (V2DI_type_node,
15334 V2DF_type_node, V2DF_type_node, NULL_TREE);
15335 tree v2df_ftype_v2df
15336 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15337 tree v2di_ftype_v2di_int
15338 = build_function_type_list (V2DI_type_node,
15339 V2DI_type_node, integer_type_node, NULL_TREE);
15340 tree v2di_ftype_v2di_v2di_int
15341 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15342 V2DI_type_node, integer_type_node, NULL_TREE);
15343 tree v4si_ftype_v4si_int
15344 = build_function_type_list (V4SI_type_node,
15345 V4SI_type_node, integer_type_node, NULL_TREE);
15346 tree v8hi_ftype_v8hi_int
15347 = build_function_type_list (V8HI_type_node,
15348 V8HI_type_node, integer_type_node, NULL_TREE);
15349 tree v8hi_ftype_v8hi_v2di
15350 = build_function_type_list (V8HI_type_node,
15351 V8HI_type_node, V2DI_type_node, NULL_TREE);
15352 tree v4si_ftype_v4si_v2di
15353 = build_function_type_list (V4SI_type_node,
15354 V4SI_type_node, V2DI_type_node, NULL_TREE);
15355 tree v4si_ftype_v8hi_v8hi
15356 = build_function_type_list (V4SI_type_node,
15357 V8HI_type_node, V8HI_type_node, NULL_TREE);
15358 tree di_ftype_v8qi_v8qi
15359 = build_function_type_list (long_long_unsigned_type_node,
15360 V8QI_type_node, V8QI_type_node, NULL_TREE);
15361 tree di_ftype_v2si_v2si
15362 = build_function_type_list (long_long_unsigned_type_node,
15363 V2SI_type_node, V2SI_type_node, NULL_TREE);
15364 tree v2di_ftype_v16qi_v16qi
15365 = build_function_type_list (V2DI_type_node,
15366 V16QI_type_node, V16QI_type_node, NULL_TREE);
15367 tree v2di_ftype_v4si_v4si
15368 = build_function_type_list (V2DI_type_node,
15369 V4SI_type_node, V4SI_type_node, NULL_TREE);
15370 tree int_ftype_v16qi
15371 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15372 tree v16qi_ftype_pcchar
15373 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15374 tree void_ftype_pchar_v16qi
15375 = build_function_type_list (void_type_node,
15376 pchar_type_node, V16QI_type_node, NULL_TREE);
15378 tree float80_type;
15379 tree float128_type;
15380 tree ftype;
15382 /* The __float80 type. */
15383 if (TYPE_MODE (long_double_type_node) == XFmode)
15384 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15385 "__float80");
15386 else
15388 /* The __float80 type. */
15389 float80_type = make_node (REAL_TYPE);
15390 TYPE_PRECISION (float80_type) = 80;
15391 layout_type (float80_type);
15392 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15395 if (TARGET_64BIT)
15397 float128_type = make_node (REAL_TYPE);
15398 TYPE_PRECISION (float128_type) = 128;
15399 layout_type (float128_type);
15400 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15403 /* Add all builtins that are more or less simple operations on two
15404 operands. */
15405 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15407 /* Use one of the operands; the target can have a different mode for
15408 mask-generating compares. */
15409 enum machine_mode mode;
15410 tree type;
15412 if (d->name == 0)
15413 continue;
15414 mode = insn_data[d->icode].operand[1].mode;
15416 switch (mode)
15418 case V16QImode:
15419 type = v16qi_ftype_v16qi_v16qi;
15420 break;
15421 case V8HImode:
15422 type = v8hi_ftype_v8hi_v8hi;
15423 break;
15424 case V4SImode:
15425 type = v4si_ftype_v4si_v4si;
15426 break;
15427 case V2DImode:
15428 type = v2di_ftype_v2di_v2di;
15429 break;
15430 case V2DFmode:
15431 type = v2df_ftype_v2df_v2df;
15432 break;
15433 case V4SFmode:
15434 type = v4sf_ftype_v4sf_v4sf;
15435 break;
15436 case V8QImode:
15437 type = v8qi_ftype_v8qi_v8qi;
15438 break;
15439 case V4HImode:
15440 type = v4hi_ftype_v4hi_v4hi;
15441 break;
15442 case V2SImode:
15443 type = v2si_ftype_v2si_v2si;
15444 break;
15445 case DImode:
15446 type = di_ftype_di_di;
15447 break;
15449 default:
15450 gcc_unreachable ();
15453 /* Override for comparisons. */
15454 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15455 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15456 type = v4si_ftype_v4sf_v4sf;
15458 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15459 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15460 type = v2di_ftype_v2df_v2df;
15462 def_builtin (d->mask, d->name, type, d->code);
15465 /* Add all builtins that are more or less simple operations on 1 operand. */
15466 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15468 enum machine_mode mode;
15469 tree type;
15471 if (d->name == 0)
15472 continue;
15473 mode = insn_data[d->icode].operand[1].mode;
15475 switch (mode)
15477 case V16QImode:
15478 type = v16qi_ftype_v16qi;
15479 break;
15480 case V8HImode:
15481 type = v8hi_ftype_v8hi;
15482 break;
15483 case V4SImode:
15484 type = v4si_ftype_v4si;
15485 break;
15486 case V2DFmode:
15487 type = v2df_ftype_v2df;
15488 break;
15489 case V4SFmode:
15490 type = v4sf_ftype_v4sf;
15491 break;
15492 case V8QImode:
15493 type = v8qi_ftype_v8qi;
15494 break;
15495 case V4HImode:
15496 type = v4hi_ftype_v4hi;
15497 break;
15498 case V2SImode:
15499 type = v2si_ftype_v2si;
15500 break;
15502 default:
15503 abort ();
15506 def_builtin (d->mask, d->name, type, d->code);
15509 /* Add the remaining MMX insns with somewhat more complicated types. */
15510 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15511 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15512 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15513 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15515 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15516 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15517 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15519 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15520 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15522 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15523 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15525 /* comi/ucomi insns. */
15526 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15527 if (d->mask == MASK_SSE2)
15528 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15529 else
15530 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15532 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15533 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15534 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15536 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15537 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15538 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15539 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15540 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15541 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15542 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15543 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15544 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15545 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15546 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15548 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15550 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15551 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15553 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15554 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15555 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15556 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15558 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15559 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15560 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15561 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15563 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15565 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15567 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15568 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15569 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15570 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15571 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15572 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15574 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15576 /* Original 3DNow! */
15577 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15578 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15579 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15580 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15581 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15582 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15583 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15584 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15585 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15586 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15587 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15588 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15589 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15590 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15591 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15592 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15593 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15594 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15595 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15596 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15598 /* 3DNow! extension as used in the Athlon CPU. */
15599 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15600 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15601 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15602 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15603 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15604 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15606 /* SSE2 */
15607 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15609 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15610 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15612 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15613 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15615 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15616 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15617 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15618 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15619 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15621 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15622 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15623 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15624 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15626 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15627 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15629 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15631 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15632 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15634 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15635 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15636 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15637 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15638 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15640 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15642 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15643 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15644 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15645 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15647 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15648 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15649 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15651 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15652 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15653 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15654 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15656 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15657 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15658 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15660 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15661 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15663 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15664 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15666 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15667 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15668 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15670 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15671 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15672 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15674 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15675 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15677 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15678 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15679 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15680 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15682 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15683 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15684 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15685 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15687 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15688 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15690 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15692 /* Prescott New Instructions. */
15693 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15694 void_ftype_pcvoid_unsigned_unsigned,
15695 IX86_BUILTIN_MONITOR);
15696 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15697 void_ftype_unsigned_unsigned,
15698 IX86_BUILTIN_MWAIT);
15699 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15700 v4sf_ftype_v4sf,
15701 IX86_BUILTIN_MOVSHDUP);
15702 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15703 v4sf_ftype_v4sf,
15704 IX86_BUILTIN_MOVSLDUP);
15705 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15706 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15708 /* SSSE3. */
15709 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15710 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15711 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15712 IX86_BUILTIN_PALIGNR);
15714 /* Access to the vec_init patterns. */
15715 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15716 integer_type_node, NULL_TREE);
15717 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15718 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15720 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15721 short_integer_type_node,
15722 short_integer_type_node,
15723 short_integer_type_node, NULL_TREE);
15724 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15725 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15727 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15728 char_type_node, char_type_node,
15729 char_type_node, char_type_node,
15730 char_type_node, char_type_node,
15731 char_type_node, NULL_TREE);
15732 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15733 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15735 /* Access to the vec_extract patterns. */
15736 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15737 integer_type_node, NULL_TREE);
15738 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15739 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15741 ftype = build_function_type_list (long_long_integer_type_node,
15742 V2DI_type_node, integer_type_node,
15743 NULL_TREE);
15744 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15745 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15747 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15748 integer_type_node, NULL_TREE);
15749 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15750 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15752 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15753 integer_type_node, NULL_TREE);
15754 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15755 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15757 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15758 integer_type_node, NULL_TREE);
15759 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15760 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15762 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15763 integer_type_node, NULL_TREE);
15764 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15765 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15767 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15768 integer_type_node, NULL_TREE);
15769 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15770 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15772 /* Access to the vec_set patterns. */
15773 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15774 intHI_type_node,
15775 integer_type_node, NULL_TREE);
15776 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15777 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15779 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15780 intHI_type_node,
15781 integer_type_node, NULL_TREE);
15782 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15783 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15786 /* Errors in the source file can cause expand_expr to return const0_rtx
15787 where we expect a vector. To avoid crashing, use one of the vector
15788 clear instructions. */
15789 static rtx
15790 safe_vector_operand (rtx x, enum machine_mode mode)
15792 if (x == const0_rtx)
15793 x = CONST0_RTX (mode);
15794 return x;
15797 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15799 static rtx
15800 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15802 rtx pat, xops[3];
15803 tree arg0 = TREE_VALUE (arglist);
15804 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15805 rtx op0 = expand_normal (arg0);
15806 rtx op1 = expand_normal (arg1);
15807 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15808 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15809 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15811 if (VECTOR_MODE_P (mode0))
15812 op0 = safe_vector_operand (op0, mode0);
15813 if (VECTOR_MODE_P (mode1))
15814 op1 = safe_vector_operand (op1, mode1);
15816 if (optimize || !target
15817 || GET_MODE (target) != tmode
15818 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15819 target = gen_reg_rtx (tmode);
15821 if (GET_MODE (op1) == SImode && mode1 == TImode)
15823 rtx x = gen_reg_rtx (V4SImode);
15824 emit_insn (gen_sse2_loadd (x, op1));
15825 op1 = gen_lowpart (TImode, x);
15828 /* The insn must want input operands in the same modes as the
15829 result. */
15830 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15831 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15833 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15834 op0 = copy_to_mode_reg (mode0, op0);
15835 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15836 op1 = copy_to_mode_reg (mode1, op1);
15838 /* ??? Using ix86_fixup_binary_operands is problematic when
15839 we've got mismatched modes. Fake it. */
15841 xops[0] = target;
15842 xops[1] = op0;
15843 xops[2] = op1;
15845 if (tmode == mode0 && tmode == mode1)
15847 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15848 op0 = xops[1];
15849 op1 = xops[2];
15851 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15853 op0 = force_reg (mode0, op0);
15854 op1 = force_reg (mode1, op1);
15855 target = gen_reg_rtx (tmode);
15858 pat = GEN_FCN (icode) (target, op0, op1);
15859 if (! pat)
15860 return 0;
15861 emit_insn (pat);
15862 return target;
15865 /* Subroutine of ix86_expand_builtin to take care of stores. */
15867 static rtx
15868 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15870 rtx pat;
15871 tree arg0 = TREE_VALUE (arglist);
15872 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15873 rtx op0 = expand_normal (arg0);
15874 rtx op1 = expand_normal (arg1);
15875 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15876 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15878 if (VECTOR_MODE_P (mode1))
15879 op1 = safe_vector_operand (op1, mode1);
15881 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15882 op1 = copy_to_mode_reg (mode1, op1);
15884 pat = GEN_FCN (icode) (op0, op1);
15885 if (pat)
15886 emit_insn (pat);
15887 return 0;
15890 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15892 static rtx
15893 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15894 rtx target, int do_load)
15896 rtx pat;
15897 tree arg0 = TREE_VALUE (arglist);
15898 rtx op0 = expand_normal (arg0);
15899 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15900 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15902 if (optimize || !target
15903 || GET_MODE (target) != tmode
15904 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15905 target = gen_reg_rtx (tmode);
15906 if (do_load)
15907 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15908 else
15910 if (VECTOR_MODE_P (mode0))
15911 op0 = safe_vector_operand (op0, mode0);
15913 if ((optimize && !register_operand (op0, mode0))
15914 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15915 op0 = copy_to_mode_reg (mode0, op0);
15918 pat = GEN_FCN (icode) (target, op0);
15919 if (! pat)
15920 return 0;
15921 emit_insn (pat);
15922 return target;
15925 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15926 sqrtss, rsqrtss, rcpss. */
15928 static rtx
15929 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15931 rtx pat;
15932 tree arg0 = TREE_VALUE (arglist);
15933 rtx op1, op0 = expand_normal (arg0);
15934 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15935 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15937 if (optimize || !target
15938 || GET_MODE (target) != tmode
15939 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15940 target = gen_reg_rtx (tmode);
15942 if (VECTOR_MODE_P (mode0))
15943 op0 = safe_vector_operand (op0, mode0);
15945 if ((optimize && !register_operand (op0, mode0))
15946 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15947 op0 = copy_to_mode_reg (mode0, op0);
15949 op1 = op0;
15950 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15951 op1 = copy_to_mode_reg (mode0, op1);
15953 pat = GEN_FCN (icode) (target, op0, op1);
15954 if (! pat)
15955 return 0;
15956 emit_insn (pat);
15957 return target;
15960 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15962 static rtx
15963 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15964 rtx target)
15966 rtx pat;
15967 tree arg0 = TREE_VALUE (arglist);
15968 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15969 rtx op0 = expand_normal (arg0);
15970 rtx op1 = expand_normal (arg1);
15971 rtx op2;
15972 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15973 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15974 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15975 enum rtx_code comparison = d->comparison;
15977 if (VECTOR_MODE_P (mode0))
15978 op0 = safe_vector_operand (op0, mode0);
15979 if (VECTOR_MODE_P (mode1))
15980 op1 = safe_vector_operand (op1, mode1);
15982 /* Swap operands if we have a comparison that isn't available in
15983 hardware. */
15984 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15986 rtx tmp = gen_reg_rtx (mode1);
15987 emit_move_insn (tmp, op1);
15988 op1 = op0;
15989 op0 = tmp;
15992 if (optimize || !target
15993 || GET_MODE (target) != tmode
15994 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15995 target = gen_reg_rtx (tmode);
15997 if ((optimize && !register_operand (op0, mode0))
15998 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15999 op0 = copy_to_mode_reg (mode0, op0);
16000 if ((optimize && !register_operand (op1, mode1))
16001 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16002 op1 = copy_to_mode_reg (mode1, op1);
16004 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16005 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16006 if (! pat)
16007 return 0;
16008 emit_insn (pat);
16009 return target;
16012 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16014 static rtx
16015 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16016 rtx target)
16018 rtx pat;
16019 tree arg0 = TREE_VALUE (arglist);
16020 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16021 rtx op0 = expand_normal (arg0);
16022 rtx op1 = expand_normal (arg1);
16023 rtx op2;
16024 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16025 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16026 enum rtx_code comparison = d->comparison;
16028 if (VECTOR_MODE_P (mode0))
16029 op0 = safe_vector_operand (op0, mode0);
16030 if (VECTOR_MODE_P (mode1))
16031 op1 = safe_vector_operand (op1, mode1);
16033 /* Swap operands if we have a comparison that isn't available in
16034 hardware. */
16035 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16037 rtx tmp = op1;
16038 op1 = op0;
16039 op0 = tmp;
16042 target = gen_reg_rtx (SImode);
16043 emit_move_insn (target, const0_rtx);
16044 target = gen_rtx_SUBREG (QImode, target, 0);
16046 if ((optimize && !register_operand (op0, mode0))
16047 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16048 op0 = copy_to_mode_reg (mode0, op0);
16049 if ((optimize && !register_operand (op1, mode1))
16050 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16051 op1 = copy_to_mode_reg (mode1, op1);
16053 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16054 pat = GEN_FCN (d->icode) (op0, op1);
16055 if (! pat)
16056 return 0;
16057 emit_insn (pat);
16058 emit_insn (gen_rtx_SET (VOIDmode,
16059 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16060 gen_rtx_fmt_ee (comparison, QImode,
16061 SET_DEST (pat),
16062 const0_rtx)));
16064 return SUBREG_REG (target);
16067 /* Return the integer constant in ARG. Constrain it to be in the range
16068 of the subparts of VEC_TYPE; issue an error if not. */
16070 static int
16071 get_element_number (tree vec_type, tree arg)
16073 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16075 if (!host_integerp (arg, 1)
16076 || (elt = tree_low_cst (arg, 1), elt > max))
16078 error ("selector must be an integer constant in the range 0..%wi", max);
16079 return 0;
16082 return elt;
16085 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16086 ix86_expand_vector_init. We DO have language-level syntax for this, in
16087 the form of (type){ init-list }. Except that since we can't place emms
16088 instructions from inside the compiler, we can't allow the use of MMX
16089 registers unless the user explicitly asks for it. So we do *not* define
16090 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16091 we have builtins invoked by mmintrin.h that gives us license to emit
16092 these sorts of instructions. */
16094 static rtx
16095 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16097 enum machine_mode tmode = TYPE_MODE (type);
16098 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16099 int i, n_elt = GET_MODE_NUNITS (tmode);
16100 rtvec v = rtvec_alloc (n_elt);
16102 gcc_assert (VECTOR_MODE_P (tmode));
16104 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16106 rtx x = expand_normal (TREE_VALUE (arglist));
16107 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16110 gcc_assert (arglist == NULL);
16112 if (!target || !register_operand (target, tmode))
16113 target = gen_reg_rtx (tmode);
16115 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16116 return target;
16119 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16120 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16121 had a language-level syntax for referencing vector elements. */
16123 static rtx
16124 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16126 enum machine_mode tmode, mode0;
16127 tree arg0, arg1;
16128 int elt;
16129 rtx op0;
16131 arg0 = TREE_VALUE (arglist);
16132 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16134 op0 = expand_normal (arg0);
16135 elt = get_element_number (TREE_TYPE (arg0), arg1);
16137 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16138 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16139 gcc_assert (VECTOR_MODE_P (mode0));
16141 op0 = force_reg (mode0, op0);
16143 if (optimize || !target || !register_operand (target, tmode))
16144 target = gen_reg_rtx (tmode);
16146 ix86_expand_vector_extract (true, target, op0, elt);
16148 return target;
16151 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16152 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16153 a language-level syntax for referencing vector elements. */
16155 static rtx
16156 ix86_expand_vec_set_builtin (tree arglist)
16158 enum machine_mode tmode, mode1;
16159 tree arg0, arg1, arg2;
16160 int elt;
16161 rtx op0, op1;
16163 arg0 = TREE_VALUE (arglist);
16164 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16165 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16167 tmode = TYPE_MODE (TREE_TYPE (arg0));
16168 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16169 gcc_assert (VECTOR_MODE_P (tmode));
16171 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16172 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16173 elt = get_element_number (TREE_TYPE (arg0), arg2);
16175 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16176 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16178 op0 = force_reg (tmode, op0);
16179 op1 = force_reg (mode1, op1);
16181 ix86_expand_vector_set (true, op0, op1, elt);
16183 return op0;
16186 /* Expand an expression EXP that calls a built-in function,
16187 with result going to TARGET if that's convenient
16188 (and in mode MODE if that's convenient).
16189 SUBTARGET may be used as the target for computing one of EXP's operands.
16190 IGNORE is nonzero if the value is to be ignored. */
16192 static rtx
16193 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16194 enum machine_mode mode ATTRIBUTE_UNUSED,
16195 int ignore ATTRIBUTE_UNUSED)
16197 const struct builtin_description *d;
16198 size_t i;
16199 enum insn_code icode;
16200 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16201 tree arglist = TREE_OPERAND (exp, 1);
16202 tree arg0, arg1, arg2;
16203 rtx op0, op1, op2, pat;
16204 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16205 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16207 switch (fcode)
16209 case IX86_BUILTIN_EMMS:
16210 emit_insn (gen_mmx_emms ());
16211 return 0;
16213 case IX86_BUILTIN_SFENCE:
16214 emit_insn (gen_sse_sfence ());
16215 return 0;
16217 case IX86_BUILTIN_MASKMOVQ:
16218 case IX86_BUILTIN_MASKMOVDQU:
16219 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16220 ? CODE_FOR_mmx_maskmovq
16221 : CODE_FOR_sse2_maskmovdqu);
16222 /* Note the arg order is different from the operand order. */
16223 arg1 = TREE_VALUE (arglist);
16224 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16225 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16226 op0 = expand_normal (arg0);
16227 op1 = expand_normal (arg1);
16228 op2 = expand_normal (arg2);
16229 mode0 = insn_data[icode].operand[0].mode;
16230 mode1 = insn_data[icode].operand[1].mode;
16231 mode2 = insn_data[icode].operand[2].mode;
16233 op0 = force_reg (Pmode, op0);
16234 op0 = gen_rtx_MEM (mode1, op0);
16236 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16237 op0 = copy_to_mode_reg (mode0, op0);
16238 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16239 op1 = copy_to_mode_reg (mode1, op1);
16240 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16241 op2 = copy_to_mode_reg (mode2, op2);
16242 pat = GEN_FCN (icode) (op0, op1, op2);
16243 if (! pat)
16244 return 0;
16245 emit_insn (pat);
16246 return 0;
16248 case IX86_BUILTIN_SQRTSS:
16249 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16250 case IX86_BUILTIN_RSQRTSS:
16251 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16252 case IX86_BUILTIN_RCPSS:
16253 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16255 case IX86_BUILTIN_LOADUPS:
16256 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16258 case IX86_BUILTIN_STOREUPS:
16259 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16261 case IX86_BUILTIN_LOADHPS:
16262 case IX86_BUILTIN_LOADLPS:
16263 case IX86_BUILTIN_LOADHPD:
16264 case IX86_BUILTIN_LOADLPD:
16265 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16266 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16267 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16268 : CODE_FOR_sse2_loadlpd);
16269 arg0 = TREE_VALUE (arglist);
16270 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16271 op0 = expand_normal (arg0);
16272 op1 = expand_normal (arg1);
16273 tmode = insn_data[icode].operand[0].mode;
16274 mode0 = insn_data[icode].operand[1].mode;
16275 mode1 = insn_data[icode].operand[2].mode;
16277 op0 = force_reg (mode0, op0);
16278 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16279 if (optimize || target == 0
16280 || GET_MODE (target) != tmode
16281 || !register_operand (target, tmode))
16282 target = gen_reg_rtx (tmode);
16283 pat = GEN_FCN (icode) (target, op0, op1);
16284 if (! pat)
16285 return 0;
16286 emit_insn (pat);
16287 return target;
16289 case IX86_BUILTIN_STOREHPS:
16290 case IX86_BUILTIN_STORELPS:
16291 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16292 : CODE_FOR_sse_storelps);
16293 arg0 = TREE_VALUE (arglist);
16294 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16295 op0 = expand_normal (arg0);
16296 op1 = expand_normal (arg1);
16297 mode0 = insn_data[icode].operand[0].mode;
16298 mode1 = insn_data[icode].operand[1].mode;
16300 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16301 op1 = force_reg (mode1, op1);
16303 pat = GEN_FCN (icode) (op0, op1);
16304 if (! pat)
16305 return 0;
16306 emit_insn (pat);
16307 return const0_rtx;
16309 case IX86_BUILTIN_MOVNTPS:
16310 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16311 case IX86_BUILTIN_MOVNTQ:
16312 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16314 case IX86_BUILTIN_LDMXCSR:
16315 op0 = expand_normal (TREE_VALUE (arglist));
16316 target = assign_386_stack_local (SImode, SLOT_TEMP);
16317 emit_move_insn (target, op0);
16318 emit_insn (gen_sse_ldmxcsr (target));
16319 return 0;
16321 case IX86_BUILTIN_STMXCSR:
16322 target = assign_386_stack_local (SImode, SLOT_TEMP);
16323 emit_insn (gen_sse_stmxcsr (target));
16324 return copy_to_mode_reg (SImode, target);
16326 case IX86_BUILTIN_SHUFPS:
16327 case IX86_BUILTIN_SHUFPD:
16328 icode = (fcode == IX86_BUILTIN_SHUFPS
16329 ? CODE_FOR_sse_shufps
16330 : CODE_FOR_sse2_shufpd);
16331 arg0 = TREE_VALUE (arglist);
16332 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16333 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16334 op0 = expand_normal (arg0);
16335 op1 = expand_normal (arg1);
16336 op2 = expand_normal (arg2);
16337 tmode = insn_data[icode].operand[0].mode;
16338 mode0 = insn_data[icode].operand[1].mode;
16339 mode1 = insn_data[icode].operand[2].mode;
16340 mode2 = insn_data[icode].operand[3].mode;
16342 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16343 op0 = copy_to_mode_reg (mode0, op0);
16344 if ((optimize && !register_operand (op1, mode1))
16345 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16346 op1 = copy_to_mode_reg (mode1, op1);
16347 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16349 /* @@@ better error message */
16350 error ("mask must be an immediate");
16351 return gen_reg_rtx (tmode);
16353 if (optimize || target == 0
16354 || GET_MODE (target) != tmode
16355 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16356 target = gen_reg_rtx (tmode);
16357 pat = GEN_FCN (icode) (target, op0, op1, op2);
16358 if (! pat)
16359 return 0;
16360 emit_insn (pat);
16361 return target;
16363 case IX86_BUILTIN_PSHUFW:
16364 case IX86_BUILTIN_PSHUFD:
16365 case IX86_BUILTIN_PSHUFHW:
16366 case IX86_BUILTIN_PSHUFLW:
16367 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16368 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16369 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16370 : CODE_FOR_mmx_pshufw);
16371 arg0 = TREE_VALUE (arglist);
16372 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16373 op0 = expand_normal (arg0);
16374 op1 = expand_normal (arg1);
16375 tmode = insn_data[icode].operand[0].mode;
16376 mode1 = insn_data[icode].operand[1].mode;
16377 mode2 = insn_data[icode].operand[2].mode;
16379 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16380 op0 = copy_to_mode_reg (mode1, op0);
16381 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16383 /* @@@ better error message */
16384 error ("mask must be an immediate");
16385 return const0_rtx;
16387 if (target == 0
16388 || GET_MODE (target) != tmode
16389 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16390 target = gen_reg_rtx (tmode);
16391 pat = GEN_FCN (icode) (target, op0, op1);
16392 if (! pat)
16393 return 0;
16394 emit_insn (pat);
16395 return target;
16397 case IX86_BUILTIN_PSLLDQI128:
16398 case IX86_BUILTIN_PSRLDQI128:
16399 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16400 : CODE_FOR_sse2_lshrti3);
16401 arg0 = TREE_VALUE (arglist);
16402 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16403 op0 = expand_normal (arg0);
16404 op1 = expand_normal (arg1);
16405 tmode = insn_data[icode].operand[0].mode;
16406 mode1 = insn_data[icode].operand[1].mode;
16407 mode2 = insn_data[icode].operand[2].mode;
16409 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16411 op0 = copy_to_reg (op0);
16412 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16414 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16416 error ("shift must be an immediate");
16417 return const0_rtx;
16419 target = gen_reg_rtx (V2DImode);
16420 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16421 if (! pat)
16422 return 0;
16423 emit_insn (pat);
16424 return target;
16426 case IX86_BUILTIN_FEMMS:
16427 emit_insn (gen_mmx_femms ());
16428 return NULL_RTX;
16430 case IX86_BUILTIN_PAVGUSB:
16431 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16433 case IX86_BUILTIN_PF2ID:
16434 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16436 case IX86_BUILTIN_PFACC:
16437 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16439 case IX86_BUILTIN_PFADD:
16440 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16442 case IX86_BUILTIN_PFCMPEQ:
16443 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16445 case IX86_BUILTIN_PFCMPGE:
16446 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16448 case IX86_BUILTIN_PFCMPGT:
16449 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16451 case IX86_BUILTIN_PFMAX:
16452 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16454 case IX86_BUILTIN_PFMIN:
16455 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16457 case IX86_BUILTIN_PFMUL:
16458 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16460 case IX86_BUILTIN_PFRCP:
16461 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16463 case IX86_BUILTIN_PFRCPIT1:
16464 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16466 case IX86_BUILTIN_PFRCPIT2:
16467 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16469 case IX86_BUILTIN_PFRSQIT1:
16470 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16472 case IX86_BUILTIN_PFRSQRT:
16473 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16475 case IX86_BUILTIN_PFSUB:
16476 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16478 case IX86_BUILTIN_PFSUBR:
16479 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16481 case IX86_BUILTIN_PI2FD:
16482 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16484 case IX86_BUILTIN_PMULHRW:
16485 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16487 case IX86_BUILTIN_PF2IW:
16488 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16490 case IX86_BUILTIN_PFNACC:
16491 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16493 case IX86_BUILTIN_PFPNACC:
16494 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16496 case IX86_BUILTIN_PI2FW:
16497 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16499 case IX86_BUILTIN_PSWAPDSI:
16500 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16502 case IX86_BUILTIN_PSWAPDSF:
16503 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16505 case IX86_BUILTIN_SQRTSD:
16506 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16507 case IX86_BUILTIN_LOADUPD:
16508 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16509 case IX86_BUILTIN_STOREUPD:
16510 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16512 case IX86_BUILTIN_MFENCE:
16513 emit_insn (gen_sse2_mfence ());
16514 return 0;
16515 case IX86_BUILTIN_LFENCE:
16516 emit_insn (gen_sse2_lfence ());
16517 return 0;
16519 case IX86_BUILTIN_CLFLUSH:
16520 arg0 = TREE_VALUE (arglist);
16521 op0 = expand_normal (arg0);
16522 icode = CODE_FOR_sse2_clflush;
16523 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16524 op0 = copy_to_mode_reg (Pmode, op0);
16526 emit_insn (gen_sse2_clflush (op0));
16527 return 0;
16529 case IX86_BUILTIN_MOVNTPD:
16530 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16531 case IX86_BUILTIN_MOVNTDQ:
16532 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16533 case IX86_BUILTIN_MOVNTI:
16534 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16536 case IX86_BUILTIN_LOADDQU:
16537 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16538 case IX86_BUILTIN_STOREDQU:
16539 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16541 case IX86_BUILTIN_MONITOR:
16542 arg0 = TREE_VALUE (arglist);
16543 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16544 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16545 op0 = expand_normal (arg0);
16546 op1 = expand_normal (arg1);
16547 op2 = expand_normal (arg2);
16548 if (!REG_P (op0))
16549 op0 = copy_to_mode_reg (Pmode, op0);
16550 if (!REG_P (op1))
16551 op1 = copy_to_mode_reg (SImode, op1);
16552 if (!REG_P (op2))
16553 op2 = copy_to_mode_reg (SImode, op2);
16554 if (!TARGET_64BIT)
16555 emit_insn (gen_sse3_monitor (op0, op1, op2));
16556 else
16557 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16558 return 0;
16560 case IX86_BUILTIN_MWAIT:
16561 arg0 = TREE_VALUE (arglist);
16562 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16563 op0 = expand_normal (arg0);
16564 op1 = expand_normal (arg1);
16565 if (!REG_P (op0))
16566 op0 = copy_to_mode_reg (SImode, op0);
16567 if (!REG_P (op1))
16568 op1 = copy_to_mode_reg (SImode, op1);
16569 emit_insn (gen_sse3_mwait (op0, op1));
16570 return 0;
16572 case IX86_BUILTIN_LDDQU:
16573 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16574 target, 1);
16576 case IX86_BUILTIN_PALIGNR:
16577 case IX86_BUILTIN_PALIGNR128:
16578 if (fcode == IX86_BUILTIN_PALIGNR)
16580 icode = CODE_FOR_ssse3_palignrdi;
16581 mode = DImode;
16583 else
16585 icode = CODE_FOR_ssse3_palignrti;
16586 mode = V2DImode;
16588 arg0 = TREE_VALUE (arglist);
16589 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16590 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16591 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16592 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16593 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16594 tmode = insn_data[icode].operand[0].mode;
16595 mode1 = insn_data[icode].operand[1].mode;
16596 mode2 = insn_data[icode].operand[2].mode;
16597 mode3 = insn_data[icode].operand[3].mode;
16599 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16601 op0 = copy_to_reg (op0);
16602 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16604 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16606 op1 = copy_to_reg (op1);
16607 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16609 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16611 error ("shift must be an immediate");
16612 return const0_rtx;
16614 target = gen_reg_rtx (mode);
16615 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16616 op0, op1, op2);
16617 if (! pat)
16618 return 0;
16619 emit_insn (pat);
16620 return target;
16622 case IX86_BUILTIN_VEC_INIT_V2SI:
16623 case IX86_BUILTIN_VEC_INIT_V4HI:
16624 case IX86_BUILTIN_VEC_INIT_V8QI:
16625 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16627 case IX86_BUILTIN_VEC_EXT_V2DF:
16628 case IX86_BUILTIN_VEC_EXT_V2DI:
16629 case IX86_BUILTIN_VEC_EXT_V4SF:
16630 case IX86_BUILTIN_VEC_EXT_V4SI:
16631 case IX86_BUILTIN_VEC_EXT_V8HI:
16632 case IX86_BUILTIN_VEC_EXT_V2SI:
16633 case IX86_BUILTIN_VEC_EXT_V4HI:
16634 return ix86_expand_vec_ext_builtin (arglist, target);
16636 case IX86_BUILTIN_VEC_SET_V8HI:
16637 case IX86_BUILTIN_VEC_SET_V4HI:
16638 return ix86_expand_vec_set_builtin (arglist);
16640 default:
16641 break;
16644 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16645 if (d->code == fcode)
16647 /* Compares are treated specially. */
16648 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16649 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16650 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16651 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16652 return ix86_expand_sse_compare (d, arglist, target);
16654 return ix86_expand_binop_builtin (d->icode, arglist, target);
16657 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16658 if (d->code == fcode)
16659 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16661 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16662 if (d->code == fcode)
16663 return ix86_expand_sse_comi (d, arglist, target);
16665 gcc_unreachable ();
16668 /* Store OPERAND to the memory after reload is completed. This means
16669 that we can't easily use assign_stack_local. */
16671 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16673 rtx result;
16675 gcc_assert (reload_completed);
16676 if (TARGET_RED_ZONE)
16678 result = gen_rtx_MEM (mode,
16679 gen_rtx_PLUS (Pmode,
16680 stack_pointer_rtx,
16681 GEN_INT (-RED_ZONE_SIZE)));
16682 emit_move_insn (result, operand);
16684 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16686 switch (mode)
16688 case HImode:
16689 case SImode:
16690 operand = gen_lowpart (DImode, operand);
16691 /* FALLTHRU */
16692 case DImode:
16693 emit_insn (
16694 gen_rtx_SET (VOIDmode,
16695 gen_rtx_MEM (DImode,
16696 gen_rtx_PRE_DEC (DImode,
16697 stack_pointer_rtx)),
16698 operand));
16699 break;
16700 default:
16701 gcc_unreachable ();
16703 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16705 else
16707 switch (mode)
16709 case DImode:
16711 rtx operands[2];
16712 split_di (&operand, 1, operands, operands + 1);
16713 emit_insn (
16714 gen_rtx_SET (VOIDmode,
16715 gen_rtx_MEM (SImode,
16716 gen_rtx_PRE_DEC (Pmode,
16717 stack_pointer_rtx)),
16718 operands[1]));
16719 emit_insn (
16720 gen_rtx_SET (VOIDmode,
16721 gen_rtx_MEM (SImode,
16722 gen_rtx_PRE_DEC (Pmode,
16723 stack_pointer_rtx)),
16724 operands[0]));
16726 break;
16727 case HImode:
16728 /* Store HImodes as SImodes. */
16729 operand = gen_lowpart (SImode, operand);
16730 /* FALLTHRU */
16731 case SImode:
16732 emit_insn (
16733 gen_rtx_SET (VOIDmode,
16734 gen_rtx_MEM (GET_MODE (operand),
16735 gen_rtx_PRE_DEC (SImode,
16736 stack_pointer_rtx)),
16737 operand));
16738 break;
16739 default:
16740 gcc_unreachable ();
16742 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16744 return result;
16747 /* Free operand from the memory. */
16748 void
16749 ix86_free_from_memory (enum machine_mode mode)
16751 if (!TARGET_RED_ZONE)
16753 int size;
16755 if (mode == DImode || TARGET_64BIT)
16756 size = 8;
16757 else
16758 size = 4;
16759 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16760 to pop or add instruction if registers are available. */
16761 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16762 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16763 GEN_INT (size))));
16767 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16768 QImode must go into class Q_REGS.
16769 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16770 movdf to do mem-to-mem moves through integer regs. */
16771 enum reg_class
16772 ix86_preferred_reload_class (rtx x, enum reg_class class)
16774 enum machine_mode mode = GET_MODE (x);
16776 /* We're only allowed to return a subclass of CLASS. Many of the
16777 following checks fail for NO_REGS, so eliminate that early. */
16778 if (class == NO_REGS)
16779 return NO_REGS;
16781 /* All classes can load zeros. */
16782 if (x == CONST0_RTX (mode))
16783 return class;
16785 /* Force constants into memory if we are loading a (nonzero) constant into
16786 an MMX or SSE register. This is because there are no MMX/SSE instructions
16787 to load from a constant. */
16788 if (CONSTANT_P (x)
16789 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16790 return NO_REGS;
16792 /* Prefer SSE regs only, if we can use them for math. */
16793 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16794 return SSE_CLASS_P (class) ? class : NO_REGS;
16796 /* Floating-point constants need more complex checks. */
16797 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16799 /* General regs can load everything. */
16800 if (reg_class_subset_p (class, GENERAL_REGS))
16801 return class;
16803 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16804 zero above. We only want to wind up preferring 80387 registers if
16805 we plan on doing computation with them. */
16806 if (TARGET_80387
16807 && standard_80387_constant_p (x))
16809 /* Limit class to non-sse. */
16810 if (class == FLOAT_SSE_REGS)
16811 return FLOAT_REGS;
16812 if (class == FP_TOP_SSE_REGS)
16813 return FP_TOP_REG;
16814 if (class == FP_SECOND_SSE_REGS)
16815 return FP_SECOND_REG;
16816 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16817 return class;
16820 return NO_REGS;
16823 /* Generally when we see PLUS here, it's the function invariant
16824 (plus soft-fp const_int). Which can only be computed into general
16825 regs. */
16826 if (GET_CODE (x) == PLUS)
16827 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16829 /* QImode constants are easy to load, but non-constant QImode data
16830 must go into Q_REGS. */
16831 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16833 if (reg_class_subset_p (class, Q_REGS))
16834 return class;
16835 if (reg_class_subset_p (Q_REGS, class))
16836 return Q_REGS;
16837 return NO_REGS;
16840 return class;
16843 /* Discourage putting floating-point values in SSE registers unless
16844 SSE math is being used, and likewise for the 387 registers. */
16845 enum reg_class
16846 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16848 enum machine_mode mode = GET_MODE (x);
16850 /* Restrict the output reload class to the register bank that we are doing
16851 math on. If we would like not to return a subset of CLASS, reject this
16852 alternative: if reload cannot do this, it will still use its choice. */
16853 mode = GET_MODE (x);
16854 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16855 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16857 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16859 if (class == FP_TOP_SSE_REGS)
16860 return FP_TOP_REG;
16861 else if (class == FP_SECOND_SSE_REGS)
16862 return FP_SECOND_REG;
16863 else
16864 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16867 return class;
16870 /* If we are copying between general and FP registers, we need a memory
16871 location. The same is true for SSE and MMX registers.
16873 The macro can't work reliably when one of the CLASSES is class containing
16874 registers from multiple units (SSE, MMX, integer). We avoid this by never
16875 combining those units in single alternative in the machine description.
16876 Ensure that this constraint holds to avoid unexpected surprises.
16878 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16879 enforce these sanity checks. */
16882 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16883 enum machine_mode mode, int strict)
16885 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16886 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16887 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16888 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16889 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16890 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16892 gcc_assert (!strict);
16893 return true;
16896 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16897 return true;
16899 /* ??? This is a lie. We do have moves between mmx/general, and for
16900 mmx/sse2. But by saying we need secondary memory we discourage the
16901 register allocator from using the mmx registers unless needed. */
16902 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16903 return true;
16905 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16907 /* SSE1 doesn't have any direct moves from other classes. */
16908 if (!TARGET_SSE2)
16909 return true;
16911 /* If the target says that inter-unit moves are more expensive
16912 than moving through memory, then don't generate them. */
16913 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16914 return true;
16916 /* Between SSE and general, we have moves no larger than word size. */
16917 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16918 return true;
16920 /* ??? For the cost of one register reformat penalty, we could use
16921 the same instructions to move SFmode and DFmode data, but the
16922 relevant move patterns don't support those alternatives. */
16923 if (mode == SFmode || mode == DFmode)
16924 return true;
16927 return false;
16930 /* Return true if the registers in CLASS cannot represent the change from
16931 modes FROM to TO. */
16933 bool
16934 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16935 enum reg_class class)
16937 if (from == to)
16938 return false;
16940 /* x87 registers can't do subreg at all, as all values are reformatted
16941 to extended precision. */
16942 if (MAYBE_FLOAT_CLASS_P (class))
16943 return true;
16945 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16947 /* Vector registers do not support QI or HImode loads. If we don't
16948 disallow a change to these modes, reload will assume it's ok to
16949 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16950 the vec_dupv4hi pattern. */
16951 if (GET_MODE_SIZE (from) < 4)
16952 return true;
16954 /* Vector registers do not support subreg with nonzero offsets, which
16955 are otherwise valid for integer registers. Since we can't see
16956 whether we have a nonzero offset from here, prohibit all
16957 nonparadoxical subregs changing size. */
16958 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16959 return true;
16962 return false;
16965 /* Return the cost of moving data from a register in class CLASS1 to
16966 one in class CLASS2.
16968 It is not required that the cost always equal 2 when FROM is the same as TO;
16969 on some machines it is expensive to move between registers if they are not
16970 general registers. */
16973 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16974 enum reg_class class2)
16976 /* In case we require secondary memory, compute cost of the store followed
16977 by load. In order to avoid bad register allocation choices, we need
16978 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16980 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16982 int cost = 1;
16984 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16985 MEMORY_MOVE_COST (mode, class1, 1));
16986 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16987 MEMORY_MOVE_COST (mode, class2, 1));
16989 /* In case of copying from general_purpose_register we may emit multiple
16990 stores followed by single load causing memory size mismatch stall.
16991 Count this as arbitrarily high cost of 20. */
16992 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16993 cost += 20;
16995 /* In the case of FP/MMX moves, the registers actually overlap, and we
16996 have to switch modes in order to treat them differently. */
16997 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16998 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16999 cost += 20;
17001 return cost;
17004 /* Moves between SSE/MMX and integer unit are expensive. */
17005 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17006 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17007 return ix86_cost->mmxsse_to_integer;
17008 if (MAYBE_FLOAT_CLASS_P (class1))
17009 return ix86_cost->fp_move;
17010 if (MAYBE_SSE_CLASS_P (class1))
17011 return ix86_cost->sse_move;
17012 if (MAYBE_MMX_CLASS_P (class1))
17013 return ix86_cost->mmx_move;
17014 return 2;
17017 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17019 bool
17020 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17022 /* Flags and only flags can only hold CCmode values. */
17023 if (CC_REGNO_P (regno))
17024 return GET_MODE_CLASS (mode) == MODE_CC;
17025 if (GET_MODE_CLASS (mode) == MODE_CC
17026 || GET_MODE_CLASS (mode) == MODE_RANDOM
17027 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17028 return 0;
17029 if (FP_REGNO_P (regno))
17030 return VALID_FP_MODE_P (mode);
17031 if (SSE_REGNO_P (regno))
17033 /* We implement the move patterns for all vector modes into and
17034 out of SSE registers, even when no operation instructions
17035 are available. */
17036 return (VALID_SSE_REG_MODE (mode)
17037 || VALID_SSE2_REG_MODE (mode)
17038 || VALID_MMX_REG_MODE (mode)
17039 || VALID_MMX_REG_MODE_3DNOW (mode));
17041 if (MMX_REGNO_P (regno))
17043 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17044 so if the register is available at all, then we can move data of
17045 the given mode into or out of it. */
17046 return (VALID_MMX_REG_MODE (mode)
17047 || VALID_MMX_REG_MODE_3DNOW (mode));
17050 if (mode == QImode)
17052 /* Take care for QImode values - they can be in non-QI regs,
17053 but then they do cause partial register stalls. */
17054 if (regno < 4 || TARGET_64BIT)
17055 return 1;
17056 if (!TARGET_PARTIAL_REG_STALL)
17057 return 1;
17058 return reload_in_progress || reload_completed;
17060 /* We handle both integer and floats in the general purpose registers. */
17061 else if (VALID_INT_MODE_P (mode))
17062 return 1;
17063 else if (VALID_FP_MODE_P (mode))
17064 return 1;
17065 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17066 on to use that value in smaller contexts, this can easily force a
17067 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17068 supporting DImode, allow it. */
17069 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17070 return 1;
17072 return 0;
17075 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17076 tieable integer mode. */
17078 static bool
17079 ix86_tieable_integer_mode_p (enum machine_mode mode)
17081 switch (mode)
17083 case HImode:
17084 case SImode:
17085 return true;
17087 case QImode:
17088 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17090 case DImode:
17091 return TARGET_64BIT;
17093 default:
17094 return false;
17098 /* Return true if MODE1 is accessible in a register that can hold MODE2
17099 without copying. That is, all register classes that can hold MODE2
17100 can also hold MODE1. */
17102 bool
17103 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17105 if (mode1 == mode2)
17106 return true;
17108 if (ix86_tieable_integer_mode_p (mode1)
17109 && ix86_tieable_integer_mode_p (mode2))
17110 return true;
17112 /* MODE2 being XFmode implies fp stack or general regs, which means we
17113 can tie any smaller floating point modes to it. Note that we do not
17114 tie this with TFmode. */
17115 if (mode2 == XFmode)
17116 return mode1 == SFmode || mode1 == DFmode;
17118 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17119 that we can tie it with SFmode. */
17120 if (mode2 == DFmode)
17121 return mode1 == SFmode;
17123 /* If MODE2 is only appropriate for an SSE register, then tie with
17124 any other mode acceptable to SSE registers. */
17125 if (GET_MODE_SIZE (mode2) >= 8
17126 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17127 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17129 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17130 with any other mode acceptable to MMX registers. */
17131 if (GET_MODE_SIZE (mode2) == 8
17132 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17133 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17135 return false;
17138 /* Return the cost of moving data of mode M between a
17139 register and memory. A value of 2 is the default; this cost is
17140 relative to those in `REGISTER_MOVE_COST'.
17142 If moving between registers and memory is more expensive than
17143 between two registers, you should define this macro to express the
17144 relative cost.
17146 Model also increased moving costs of QImode registers in non
17147 Q_REGS classes.
17150 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17152 if (FLOAT_CLASS_P (class))
17154 int index;
17155 switch (mode)
17157 case SFmode:
17158 index = 0;
17159 break;
17160 case DFmode:
17161 index = 1;
17162 break;
17163 case XFmode:
17164 index = 2;
17165 break;
17166 default:
17167 return 100;
17169 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17171 if (SSE_CLASS_P (class))
17173 int index;
17174 switch (GET_MODE_SIZE (mode))
17176 case 4:
17177 index = 0;
17178 break;
17179 case 8:
17180 index = 1;
17181 break;
17182 case 16:
17183 index = 2;
17184 break;
17185 default:
17186 return 100;
17188 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17190 if (MMX_CLASS_P (class))
17192 int index;
17193 switch (GET_MODE_SIZE (mode))
17195 case 4:
17196 index = 0;
17197 break;
17198 case 8:
17199 index = 1;
17200 break;
17201 default:
17202 return 100;
17204 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17206 switch (GET_MODE_SIZE (mode))
17208 case 1:
17209 if (in)
17210 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17211 : ix86_cost->movzbl_load);
17212 else
17213 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17214 : ix86_cost->int_store[0] + 4);
17215 break;
17216 case 2:
17217 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17218 default:
17219 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17220 if (mode == TFmode)
17221 mode = XFmode;
17222 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17223 * (((int) GET_MODE_SIZE (mode)
17224 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17228 /* Compute a (partial) cost for rtx X. Return true if the complete
17229 cost has been computed, and false if subexpressions should be
17230 scanned. In either case, *TOTAL contains the cost result. */
17232 static bool
17233 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17235 enum machine_mode mode = GET_MODE (x);
17237 switch (code)
17239 case CONST_INT:
17240 case CONST:
17241 case LABEL_REF:
17242 case SYMBOL_REF:
17243 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17244 *total = 3;
17245 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17246 *total = 2;
17247 else if (flag_pic && SYMBOLIC_CONST (x)
17248 && (!TARGET_64BIT
17249 || (!GET_CODE (x) != LABEL_REF
17250 && (GET_CODE (x) != SYMBOL_REF
17251 || !SYMBOL_REF_LOCAL_P (x)))))
17252 *total = 1;
17253 else
17254 *total = 0;
17255 return true;
17257 case CONST_DOUBLE:
17258 if (mode == VOIDmode)
17259 *total = 0;
17260 else
17261 switch (standard_80387_constant_p (x))
17263 case 1: /* 0.0 */
17264 *total = 1;
17265 break;
17266 default: /* Other constants */
17267 *total = 2;
17268 break;
17269 case 0:
17270 case -1:
17271 /* Start with (MEM (SYMBOL_REF)), since that's where
17272 it'll probably end up. Add a penalty for size. */
17273 *total = (COSTS_N_INSNS (1)
17274 + (flag_pic != 0 && !TARGET_64BIT)
17275 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17276 break;
17278 return true;
17280 case ZERO_EXTEND:
17281 /* The zero extensions is often completely free on x86_64, so make
17282 it as cheap as possible. */
17283 if (TARGET_64BIT && mode == DImode
17284 && GET_MODE (XEXP (x, 0)) == SImode)
17285 *total = 1;
17286 else if (TARGET_ZERO_EXTEND_WITH_AND)
17287 *total = ix86_cost->add;
17288 else
17289 *total = ix86_cost->movzx;
17290 return false;
17292 case SIGN_EXTEND:
17293 *total = ix86_cost->movsx;
17294 return false;
17296 case ASHIFT:
17297 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17298 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17300 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17301 if (value == 1)
17303 *total = ix86_cost->add;
17304 return false;
17306 if ((value == 2 || value == 3)
17307 && ix86_cost->lea <= ix86_cost->shift_const)
17309 *total = ix86_cost->lea;
17310 return false;
17313 /* FALLTHRU */
17315 case ROTATE:
17316 case ASHIFTRT:
17317 case LSHIFTRT:
17318 case ROTATERT:
17319 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17321 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17323 if (INTVAL (XEXP (x, 1)) > 32)
17324 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17325 else
17326 *total = ix86_cost->shift_const * 2;
17328 else
17330 if (GET_CODE (XEXP (x, 1)) == AND)
17331 *total = ix86_cost->shift_var * 2;
17332 else
17333 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17336 else
17338 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17339 *total = ix86_cost->shift_const;
17340 else
17341 *total = ix86_cost->shift_var;
17343 return false;
17345 case MULT:
17346 if (FLOAT_MODE_P (mode))
17348 *total = ix86_cost->fmul;
17349 return false;
17351 else
17353 rtx op0 = XEXP (x, 0);
17354 rtx op1 = XEXP (x, 1);
17355 int nbits;
17356 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17358 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17359 for (nbits = 0; value != 0; value &= value - 1)
17360 nbits++;
17362 else
17363 /* This is arbitrary. */
17364 nbits = 7;
17366 /* Compute costs correctly for widening multiplication. */
17367 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17368 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17369 == GET_MODE_SIZE (mode))
17371 int is_mulwiden = 0;
17372 enum machine_mode inner_mode = GET_MODE (op0);
17374 if (GET_CODE (op0) == GET_CODE (op1))
17375 is_mulwiden = 1, op1 = XEXP (op1, 0);
17376 else if (GET_CODE (op1) == CONST_INT)
17378 if (GET_CODE (op0) == SIGN_EXTEND)
17379 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17380 == INTVAL (op1);
17381 else
17382 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17385 if (is_mulwiden)
17386 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17389 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17390 + nbits * ix86_cost->mult_bit
17391 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17393 return true;
17396 case DIV:
17397 case UDIV:
17398 case MOD:
17399 case UMOD:
17400 if (FLOAT_MODE_P (mode))
17401 *total = ix86_cost->fdiv;
17402 else
17403 *total = ix86_cost->divide[MODE_INDEX (mode)];
17404 return false;
17406 case PLUS:
17407 if (FLOAT_MODE_P (mode))
17408 *total = ix86_cost->fadd;
17409 else if (GET_MODE_CLASS (mode) == MODE_INT
17410 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17412 if (GET_CODE (XEXP (x, 0)) == PLUS
17413 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17414 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17415 && CONSTANT_P (XEXP (x, 1)))
17417 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17418 if (val == 2 || val == 4 || val == 8)
17420 *total = ix86_cost->lea;
17421 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17422 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17423 outer_code);
17424 *total += rtx_cost (XEXP (x, 1), outer_code);
17425 return true;
17428 else if (GET_CODE (XEXP (x, 0)) == MULT
17429 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17431 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17432 if (val == 2 || val == 4 || val == 8)
17434 *total = ix86_cost->lea;
17435 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17436 *total += rtx_cost (XEXP (x, 1), outer_code);
17437 return true;
17440 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17442 *total = ix86_cost->lea;
17443 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17444 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17445 *total += rtx_cost (XEXP (x, 1), outer_code);
17446 return true;
17449 /* FALLTHRU */
17451 case MINUS:
17452 if (FLOAT_MODE_P (mode))
17454 *total = ix86_cost->fadd;
17455 return false;
17457 /* FALLTHRU */
17459 case AND:
17460 case IOR:
17461 case XOR:
17462 if (!TARGET_64BIT && mode == DImode)
17464 *total = (ix86_cost->add * 2
17465 + (rtx_cost (XEXP (x, 0), outer_code)
17466 << (GET_MODE (XEXP (x, 0)) != DImode))
17467 + (rtx_cost (XEXP (x, 1), outer_code)
17468 << (GET_MODE (XEXP (x, 1)) != DImode)));
17469 return true;
17471 /* FALLTHRU */
17473 case NEG:
17474 if (FLOAT_MODE_P (mode))
17476 *total = ix86_cost->fchs;
17477 return false;
17479 /* FALLTHRU */
17481 case NOT:
17482 if (!TARGET_64BIT && mode == DImode)
17483 *total = ix86_cost->add * 2;
17484 else
17485 *total = ix86_cost->add;
17486 return false;
17488 case COMPARE:
17489 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17490 && XEXP (XEXP (x, 0), 1) == const1_rtx
17491 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17492 && XEXP (x, 1) == const0_rtx)
17494 /* This kind of construct is implemented using test[bwl].
17495 Treat it as if we had an AND. */
17496 *total = (ix86_cost->add
17497 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17498 + rtx_cost (const1_rtx, outer_code));
17499 return true;
17501 return false;
17503 case FLOAT_EXTEND:
17504 if (!TARGET_SSE_MATH
17505 || mode == XFmode
17506 || (mode == DFmode && !TARGET_SSE2))
17507 /* For standard 80387 constants, raise the cost to prevent
17508 compress_float_constant() to generate load from memory. */
17509 switch (standard_80387_constant_p (XEXP (x, 0)))
17511 case -1:
17512 case 0:
17513 *total = 0;
17514 break;
17515 case 1: /* 0.0 */
17516 *total = 1;
17517 break;
17518 default:
17519 *total = (x86_ext_80387_constants & TUNEMASK
17520 || optimize_size
17521 ? 1 : 0);
17523 return false;
17525 case ABS:
17526 if (FLOAT_MODE_P (mode))
17527 *total = ix86_cost->fabs;
17528 return false;
17530 case SQRT:
17531 if (FLOAT_MODE_P (mode))
17532 *total = ix86_cost->fsqrt;
17533 return false;
17535 case UNSPEC:
17536 if (XINT (x, 1) == UNSPEC_TP)
17537 *total = 0;
17538 return false;
17540 default:
17541 return false;
17545 #if TARGET_MACHO
17547 static int current_machopic_label_num;
17549 /* Given a symbol name and its associated stub, write out the
17550 definition of the stub. */
17552 void
17553 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17555 unsigned int length;
17556 char *binder_name, *symbol_name, lazy_ptr_name[32];
17557 int label = ++current_machopic_label_num;
17559 /* For 64-bit we shouldn't get here. */
17560 gcc_assert (!TARGET_64BIT);
17562 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17563 symb = (*targetm.strip_name_encoding) (symb);
17565 length = strlen (stub);
17566 binder_name = alloca (length + 32);
17567 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17569 length = strlen (symb);
17570 symbol_name = alloca (length + 32);
17571 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17573 sprintf (lazy_ptr_name, "L%d$lz", label);
17575 if (MACHOPIC_PURE)
17576 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17577 else
17578 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17580 fprintf (file, "%s:\n", stub);
17581 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17583 if (MACHOPIC_PURE)
17585 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17586 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17587 fprintf (file, "\tjmp\t*%%edx\n");
17589 else
17590 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17592 fprintf (file, "%s:\n", binder_name);
17594 if (MACHOPIC_PURE)
17596 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17597 fprintf (file, "\tpushl\t%%eax\n");
17599 else
17600 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17602 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17604 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17605 fprintf (file, "%s:\n", lazy_ptr_name);
17606 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17607 fprintf (file, "\t.long %s\n", binder_name);
17610 void
17611 darwin_x86_file_end (void)
17613 darwin_file_end ();
17614 ix86_file_end ();
17616 #endif /* TARGET_MACHO */
17618 /* Order the registers for register allocator. */
17620 void
17621 x86_order_regs_for_local_alloc (void)
17623 int pos = 0;
17624 int i;
17626 /* First allocate the local general purpose registers. */
17627 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17628 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17629 reg_alloc_order [pos++] = i;
17631 /* Global general purpose registers. */
17632 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17633 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17634 reg_alloc_order [pos++] = i;
17636 /* x87 registers come first in case we are doing FP math
17637 using them. */
17638 if (!TARGET_SSE_MATH)
17639 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17640 reg_alloc_order [pos++] = i;
17642 /* SSE registers. */
17643 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17644 reg_alloc_order [pos++] = i;
17645 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17646 reg_alloc_order [pos++] = i;
17648 /* x87 registers. */
17649 if (TARGET_SSE_MATH)
17650 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17651 reg_alloc_order [pos++] = i;
17653 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17654 reg_alloc_order [pos++] = i;
17656 /* Initialize the rest of array as we do not allocate some registers
17657 at all. */
17658 while (pos < FIRST_PSEUDO_REGISTER)
17659 reg_alloc_order [pos++] = 0;
17662 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17663 struct attribute_spec.handler. */
17664 static tree
17665 ix86_handle_struct_attribute (tree *node, tree name,
17666 tree args ATTRIBUTE_UNUSED,
17667 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17669 tree *type = NULL;
17670 if (DECL_P (*node))
17672 if (TREE_CODE (*node) == TYPE_DECL)
17673 type = &TREE_TYPE (*node);
17675 else
17676 type = node;
17678 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17679 || TREE_CODE (*type) == UNION_TYPE)))
17681 warning (OPT_Wattributes, "%qs attribute ignored",
17682 IDENTIFIER_POINTER (name));
17683 *no_add_attrs = true;
17686 else if ((is_attribute_p ("ms_struct", name)
17687 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17688 || ((is_attribute_p ("gcc_struct", name)
17689 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17691 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17692 IDENTIFIER_POINTER (name));
17693 *no_add_attrs = true;
17696 return NULL_TREE;
17699 static bool
17700 ix86_ms_bitfield_layout_p (tree record_type)
17702 return (TARGET_MS_BITFIELD_LAYOUT &&
17703 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17704 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17707 /* Returns an expression indicating where the this parameter is
17708 located on entry to the FUNCTION. */
17710 static rtx
17711 x86_this_parameter (tree function)
17713 tree type = TREE_TYPE (function);
17715 if (TARGET_64BIT)
17717 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17718 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17721 if (ix86_function_regparm (type, function) > 0)
17723 tree parm;
17725 parm = TYPE_ARG_TYPES (type);
17726 /* Figure out whether or not the function has a variable number of
17727 arguments. */
17728 for (; parm; parm = TREE_CHAIN (parm))
17729 if (TREE_VALUE (parm) == void_type_node)
17730 break;
17731 /* If not, the this parameter is in the first argument. */
17732 if (parm)
17734 int regno = 0;
17735 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17736 regno = 2;
17737 return gen_rtx_REG (SImode, regno);
17741 if (aggregate_value_p (TREE_TYPE (type), type))
17742 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17743 else
17744 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17747 /* Determine whether x86_output_mi_thunk can succeed. */
17749 static bool
17750 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17751 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17752 HOST_WIDE_INT vcall_offset, tree function)
17754 /* 64-bit can handle anything. */
17755 if (TARGET_64BIT)
17756 return true;
17758 /* For 32-bit, everything's fine if we have one free register. */
17759 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17760 return true;
17762 /* Need a free register for vcall_offset. */
17763 if (vcall_offset)
17764 return false;
17766 /* Need a free register for GOT references. */
17767 if (flag_pic && !(*targetm.binds_local_p) (function))
17768 return false;
17770 /* Otherwise ok. */
17771 return true;
17774 /* Output the assembler code for a thunk function. THUNK_DECL is the
17775 declaration for the thunk function itself, FUNCTION is the decl for
17776 the target function. DELTA is an immediate constant offset to be
17777 added to THIS. If VCALL_OFFSET is nonzero, the word at
17778 *(*this + vcall_offset) should be added to THIS. */
17780 static void
17781 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17782 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17783 HOST_WIDE_INT vcall_offset, tree function)
17785 rtx xops[3];
17786 rtx this = x86_this_parameter (function);
17787 rtx this_reg, tmp;
17789 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17790 pull it in now and let DELTA benefit. */
17791 if (REG_P (this))
17792 this_reg = this;
17793 else if (vcall_offset)
17795 /* Put the this parameter into %eax. */
17796 xops[0] = this;
17797 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17798 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17800 else
17801 this_reg = NULL_RTX;
17803 /* Adjust the this parameter by a fixed constant. */
17804 if (delta)
17806 xops[0] = GEN_INT (delta);
17807 xops[1] = this_reg ? this_reg : this;
17808 if (TARGET_64BIT)
17810 if (!x86_64_general_operand (xops[0], DImode))
17812 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17813 xops[1] = tmp;
17814 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17815 xops[0] = tmp;
17816 xops[1] = this;
17818 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17820 else
17821 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17824 /* Adjust the this parameter by a value stored in the vtable. */
17825 if (vcall_offset)
17827 if (TARGET_64BIT)
17828 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17829 else
17831 int tmp_regno = 2 /* ECX */;
17832 if (lookup_attribute ("fastcall",
17833 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17834 tmp_regno = 0 /* EAX */;
17835 tmp = gen_rtx_REG (SImode, tmp_regno);
17838 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17839 xops[1] = tmp;
17840 if (TARGET_64BIT)
17841 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17842 else
17843 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17845 /* Adjust the this parameter. */
17846 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17847 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17849 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17850 xops[0] = GEN_INT (vcall_offset);
17851 xops[1] = tmp2;
17852 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17853 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17855 xops[1] = this_reg;
17856 if (TARGET_64BIT)
17857 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17858 else
17859 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17862 /* If necessary, drop THIS back to its stack slot. */
17863 if (this_reg && this_reg != this)
17865 xops[0] = this_reg;
17866 xops[1] = this;
17867 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17870 xops[0] = XEXP (DECL_RTL (function), 0);
17871 if (TARGET_64BIT)
17873 if (!flag_pic || (*targetm.binds_local_p) (function))
17874 output_asm_insn ("jmp\t%P0", xops);
17875 else
17877 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17878 tmp = gen_rtx_CONST (Pmode, tmp);
17879 tmp = gen_rtx_MEM (QImode, tmp);
17880 xops[0] = tmp;
17881 output_asm_insn ("jmp\t%A0", xops);
17884 else
17886 if (!flag_pic || (*targetm.binds_local_p) (function))
17887 output_asm_insn ("jmp\t%P0", xops);
17888 else
17889 #if TARGET_MACHO
17890 if (TARGET_MACHO)
17892 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17893 tmp = (gen_rtx_SYMBOL_REF
17894 (Pmode,
17895 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17896 tmp = gen_rtx_MEM (QImode, tmp);
17897 xops[0] = tmp;
17898 output_asm_insn ("jmp\t%0", xops);
17900 else
17901 #endif /* TARGET_MACHO */
17903 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17904 output_set_got (tmp, NULL_RTX);
17906 xops[1] = tmp;
17907 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17908 output_asm_insn ("jmp\t{*}%1", xops);
17913 static void
17914 x86_file_start (void)
17916 default_file_start ();
17917 #if TARGET_MACHO
17918 darwin_file_start ();
17919 #endif
17920 if (X86_FILE_START_VERSION_DIRECTIVE)
17921 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17922 if (X86_FILE_START_FLTUSED)
17923 fputs ("\t.global\t__fltused\n", asm_out_file);
17924 if (ix86_asm_dialect == ASM_INTEL)
17925 fputs ("\t.intel_syntax\n", asm_out_file);
17929 x86_field_alignment (tree field, int computed)
17931 enum machine_mode mode;
17932 tree type = TREE_TYPE (field);
17934 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17935 return computed;
17936 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17937 ? get_inner_array_type (type) : type);
17938 if (mode == DFmode || mode == DCmode
17939 || GET_MODE_CLASS (mode) == MODE_INT
17940 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17941 return MIN (32, computed);
17942 return computed;
17945 /* Output assembler code to FILE to increment profiler label # LABELNO
17946 for profiling a function entry. */
17947 void
17948 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17950 if (TARGET_64BIT)
17951 if (flag_pic)
17953 #ifndef NO_PROFILE_COUNTERS
17954 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17955 #endif
17956 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17958 else
17960 #ifndef NO_PROFILE_COUNTERS
17961 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17962 #endif
17963 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17965 else if (flag_pic)
17967 #ifndef NO_PROFILE_COUNTERS
17968 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17969 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17970 #endif
17971 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17973 else
17975 #ifndef NO_PROFILE_COUNTERS
17976 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17977 PROFILE_COUNT_REGISTER);
17978 #endif
17979 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17983 /* We don't have exact information about the insn sizes, but we may assume
17984 quite safely that we are informed about all 1 byte insns and memory
17985 address sizes. This is enough to eliminate unnecessary padding in
17986 99% of cases. */
17988 static int
17989 min_insn_size (rtx insn)
17991 int l = 0;
17993 if (!INSN_P (insn) || !active_insn_p (insn))
17994 return 0;
17996 /* Discard alignments we've emit and jump instructions. */
17997 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17998 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17999 return 0;
18000 if (GET_CODE (insn) == JUMP_INSN
18001 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18002 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18003 return 0;
18005 /* Important case - calls are always 5 bytes.
18006 It is common to have many calls in the row. */
18007 if (GET_CODE (insn) == CALL_INSN
18008 && symbolic_reference_mentioned_p (PATTERN (insn))
18009 && !SIBLING_CALL_P (insn))
18010 return 5;
18011 if (get_attr_length (insn) <= 1)
18012 return 1;
18014 /* For normal instructions we may rely on the sizes of addresses
18015 and the presence of symbol to require 4 bytes of encoding.
18016 This is not the case for jumps where references are PC relative. */
18017 if (GET_CODE (insn) != JUMP_INSN)
18019 l = get_attr_length_address (insn);
18020 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18021 l = 4;
18023 if (l)
18024 return 1+l;
18025 else
18026 return 2;
18029 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18030 window. */
18032 static void
18033 ix86_avoid_jump_misspredicts (void)
18035 rtx insn, start = get_insns ();
18036 int nbytes = 0, njumps = 0;
18037 int isjump = 0;
18039 /* Look for all minimal intervals of instructions containing 4 jumps.
18040 The intervals are bounded by START and INSN. NBYTES is the total
18041 size of instructions in the interval including INSN and not including
18042 START. When the NBYTES is smaller than 16 bytes, it is possible
18043 that the end of START and INSN ends up in the same 16byte page.
18045 The smallest offset in the page INSN can start is the case where START
18046 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18047 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18049 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18052 nbytes += min_insn_size (insn);
18053 if (dump_file)
18054 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18055 INSN_UID (insn), min_insn_size (insn));
18056 if ((GET_CODE (insn) == JUMP_INSN
18057 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18058 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18059 || GET_CODE (insn) == CALL_INSN)
18060 njumps++;
18061 else
18062 continue;
18064 while (njumps > 3)
18066 start = NEXT_INSN (start);
18067 if ((GET_CODE (start) == JUMP_INSN
18068 && GET_CODE (PATTERN (start)) != ADDR_VEC
18069 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18070 || GET_CODE (start) == CALL_INSN)
18071 njumps--, isjump = 1;
18072 else
18073 isjump = 0;
18074 nbytes -= min_insn_size (start);
18076 gcc_assert (njumps >= 0);
18077 if (dump_file)
18078 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18079 INSN_UID (start), INSN_UID (insn), nbytes);
18081 if (njumps == 3 && isjump && nbytes < 16)
18083 int padsize = 15 - nbytes + min_insn_size (insn);
18085 if (dump_file)
18086 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18087 INSN_UID (insn), padsize);
18088 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18093 /* AMD Athlon works faster
18094 when RET is not destination of conditional jump or directly preceded
18095 by other jump instruction. We avoid the penalty by inserting NOP just
18096 before the RET instructions in such cases. */
18097 static void
18098 ix86_pad_returns (void)
18100 edge e;
18101 edge_iterator ei;
18103 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18105 basic_block bb = e->src;
18106 rtx ret = BB_END (bb);
18107 rtx prev;
18108 bool replace = false;
18110 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18111 || !maybe_hot_bb_p (bb))
18112 continue;
18113 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18114 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18115 break;
18116 if (prev && GET_CODE (prev) == CODE_LABEL)
18118 edge e;
18119 edge_iterator ei;
18121 FOR_EACH_EDGE (e, ei, bb->preds)
18122 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18123 && !(e->flags & EDGE_FALLTHRU))
18124 replace = true;
18126 if (!replace)
18128 prev = prev_active_insn (ret);
18129 if (prev
18130 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18131 || GET_CODE (prev) == CALL_INSN))
18132 replace = true;
18133 /* Empty functions get branch mispredict even when the jump destination
18134 is not visible to us. */
18135 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18136 replace = true;
18138 if (replace)
18140 emit_insn_before (gen_return_internal_long (), ret);
18141 delete_insn (ret);
18146 /* Implement machine specific optimizations. We implement padding of returns
18147 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18148 static void
18149 ix86_reorg (void)
18151 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18152 ix86_pad_returns ();
18153 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18154 ix86_avoid_jump_misspredicts ();
18157 /* Return nonzero when QImode register that must be represented via REX prefix
18158 is used. */
18159 bool
18160 x86_extended_QIreg_mentioned_p (rtx insn)
18162 int i;
18163 extract_insn_cached (insn);
18164 for (i = 0; i < recog_data.n_operands; i++)
18165 if (REG_P (recog_data.operand[i])
18166 && REGNO (recog_data.operand[i]) >= 4)
18167 return true;
18168 return false;
18171 /* Return nonzero when P points to register encoded via REX prefix.
18172 Called via for_each_rtx. */
18173 static int
18174 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18176 unsigned int regno;
18177 if (!REG_P (*p))
18178 return 0;
18179 regno = REGNO (*p);
18180 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18183 /* Return true when INSN mentions register that must be encoded using REX
18184 prefix. */
18185 bool
18186 x86_extended_reg_mentioned_p (rtx insn)
18188 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18191 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18192 optabs would emit if we didn't have TFmode patterns. */
18194 void
18195 x86_emit_floatuns (rtx operands[2])
18197 rtx neglab, donelab, i0, i1, f0, in, out;
18198 enum machine_mode mode, inmode;
18200 inmode = GET_MODE (operands[1]);
18201 gcc_assert (inmode == SImode || inmode == DImode);
18203 out = operands[0];
18204 in = force_reg (inmode, operands[1]);
18205 mode = GET_MODE (out);
18206 neglab = gen_label_rtx ();
18207 donelab = gen_label_rtx ();
18208 i1 = gen_reg_rtx (Pmode);
18209 f0 = gen_reg_rtx (mode);
18211 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18213 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18214 emit_jump_insn (gen_jump (donelab));
18215 emit_barrier ();
18217 emit_label (neglab);
18219 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18220 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18221 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18222 expand_float (f0, i0, 0);
18223 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18225 emit_label (donelab);
18228 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18229 with all elements equal to VAR. Return true if successful. */
18231 static bool
18232 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18233 rtx target, rtx val)
18235 enum machine_mode smode, wsmode, wvmode;
18236 rtx x;
18238 switch (mode)
18240 case V2SImode:
18241 case V2SFmode:
18242 if (!mmx_ok)
18243 return false;
18244 /* FALLTHRU */
18246 case V2DFmode:
18247 case V2DImode:
18248 case V4SFmode:
18249 case V4SImode:
18250 val = force_reg (GET_MODE_INNER (mode), val);
18251 x = gen_rtx_VEC_DUPLICATE (mode, val);
18252 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18253 return true;
18255 case V4HImode:
18256 if (!mmx_ok)
18257 return false;
18258 if (TARGET_SSE || TARGET_3DNOW_A)
18260 val = gen_lowpart (SImode, val);
18261 x = gen_rtx_TRUNCATE (HImode, val);
18262 x = gen_rtx_VEC_DUPLICATE (mode, x);
18263 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18264 return true;
18266 else
18268 smode = HImode;
18269 wsmode = SImode;
18270 wvmode = V2SImode;
18271 goto widen;
18274 case V8QImode:
18275 if (!mmx_ok)
18276 return false;
18277 smode = QImode;
18278 wsmode = HImode;
18279 wvmode = V4HImode;
18280 goto widen;
18281 case V8HImode:
18282 if (TARGET_SSE2)
18284 rtx tmp1, tmp2;
18285 /* Extend HImode to SImode using a paradoxical SUBREG. */
18286 tmp1 = gen_reg_rtx (SImode);
18287 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18288 /* Insert the SImode value as low element of V4SImode vector. */
18289 tmp2 = gen_reg_rtx (V4SImode);
18290 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18291 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18292 CONST0_RTX (V4SImode),
18293 const1_rtx);
18294 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18295 /* Cast the V4SImode vector back to a V8HImode vector. */
18296 tmp1 = gen_reg_rtx (V8HImode);
18297 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18298 /* Duplicate the low short through the whole low SImode word. */
18299 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18300 /* Cast the V8HImode vector back to a V4SImode vector. */
18301 tmp2 = gen_reg_rtx (V4SImode);
18302 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18303 /* Replicate the low element of the V4SImode vector. */
18304 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18305 /* Cast the V2SImode back to V8HImode, and store in target. */
18306 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18307 return true;
18309 smode = HImode;
18310 wsmode = SImode;
18311 wvmode = V4SImode;
18312 goto widen;
18313 case V16QImode:
18314 if (TARGET_SSE2)
18316 rtx tmp1, tmp2;
18317 /* Extend QImode to SImode using a paradoxical SUBREG. */
18318 tmp1 = gen_reg_rtx (SImode);
18319 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18320 /* Insert the SImode value as low element of V4SImode vector. */
18321 tmp2 = gen_reg_rtx (V4SImode);
18322 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18323 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18324 CONST0_RTX (V4SImode),
18325 const1_rtx);
18326 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18327 /* Cast the V4SImode vector back to a V16QImode vector. */
18328 tmp1 = gen_reg_rtx (V16QImode);
18329 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18330 /* Duplicate the low byte through the whole low SImode word. */
18331 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18332 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18333 /* Cast the V16QImode vector back to a V4SImode vector. */
18334 tmp2 = gen_reg_rtx (V4SImode);
18335 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18336 /* Replicate the low element of the V4SImode vector. */
18337 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18338 /* Cast the V2SImode back to V16QImode, and store in target. */
18339 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18340 return true;
18342 smode = QImode;
18343 wsmode = HImode;
18344 wvmode = V8HImode;
18345 goto widen;
18346 widen:
18347 /* Replicate the value once into the next wider mode and recurse. */
18348 val = convert_modes (wsmode, smode, val, true);
18349 x = expand_simple_binop (wsmode, ASHIFT, val,
18350 GEN_INT (GET_MODE_BITSIZE (smode)),
18351 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18352 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18354 x = gen_reg_rtx (wvmode);
18355 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18356 gcc_unreachable ();
18357 emit_move_insn (target, gen_lowpart (mode, x));
18358 return true;
18360 default:
18361 return false;
18365 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18366 whose ONE_VAR element is VAR, and other elements are zero. Return true
18367 if successful. */
18369 static bool
18370 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18371 rtx target, rtx var, int one_var)
18373 enum machine_mode vsimode;
18374 rtx new_target;
18375 rtx x, tmp;
18377 switch (mode)
18379 case V2SFmode:
18380 case V2SImode:
18381 if (!mmx_ok)
18382 return false;
18383 /* FALLTHRU */
18385 case V2DFmode:
18386 case V2DImode:
18387 if (one_var != 0)
18388 return false;
18389 var = force_reg (GET_MODE_INNER (mode), var);
18390 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18391 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18392 return true;
18394 case V4SFmode:
18395 case V4SImode:
18396 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18397 new_target = gen_reg_rtx (mode);
18398 else
18399 new_target = target;
18400 var = force_reg (GET_MODE_INNER (mode), var);
18401 x = gen_rtx_VEC_DUPLICATE (mode, var);
18402 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18403 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18404 if (one_var != 0)
18406 /* We need to shuffle the value to the correct position, so
18407 create a new pseudo to store the intermediate result. */
18409 /* With SSE2, we can use the integer shuffle insns. */
18410 if (mode != V4SFmode && TARGET_SSE2)
18412 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18413 GEN_INT (1),
18414 GEN_INT (one_var == 1 ? 0 : 1),
18415 GEN_INT (one_var == 2 ? 0 : 1),
18416 GEN_INT (one_var == 3 ? 0 : 1)));
18417 if (target != new_target)
18418 emit_move_insn (target, new_target);
18419 return true;
18422 /* Otherwise convert the intermediate result to V4SFmode and
18423 use the SSE1 shuffle instructions. */
18424 if (mode != V4SFmode)
18426 tmp = gen_reg_rtx (V4SFmode);
18427 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18429 else
18430 tmp = new_target;
18432 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18433 GEN_INT (1),
18434 GEN_INT (one_var == 1 ? 0 : 1),
18435 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18436 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18438 if (mode != V4SFmode)
18439 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18440 else if (tmp != target)
18441 emit_move_insn (target, tmp);
18443 else if (target != new_target)
18444 emit_move_insn (target, new_target);
18445 return true;
18447 case V8HImode:
18448 case V16QImode:
18449 vsimode = V4SImode;
18450 goto widen;
18451 case V4HImode:
18452 case V8QImode:
18453 if (!mmx_ok)
18454 return false;
18455 vsimode = V2SImode;
18456 goto widen;
18457 widen:
18458 if (one_var != 0)
18459 return false;
18461 /* Zero extend the variable element to SImode and recurse. */
18462 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18464 x = gen_reg_rtx (vsimode);
18465 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18466 var, one_var))
18467 gcc_unreachable ();
18469 emit_move_insn (target, gen_lowpart (mode, x));
18470 return true;
18472 default:
18473 return false;
18477 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18478 consisting of the values in VALS. It is known that all elements
18479 except ONE_VAR are constants. Return true if successful. */
18481 static bool
18482 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18483 rtx target, rtx vals, int one_var)
18485 rtx var = XVECEXP (vals, 0, one_var);
18486 enum machine_mode wmode;
18487 rtx const_vec, x;
18489 const_vec = copy_rtx (vals);
18490 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18491 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18493 switch (mode)
18495 case V2DFmode:
18496 case V2DImode:
18497 case V2SFmode:
18498 case V2SImode:
18499 /* For the two element vectors, it's just as easy to use
18500 the general case. */
18501 return false;
18503 case V4SFmode:
18504 case V4SImode:
18505 case V8HImode:
18506 case V4HImode:
18507 break;
18509 case V16QImode:
18510 wmode = V8HImode;
18511 goto widen;
18512 case V8QImode:
18513 wmode = V4HImode;
18514 goto widen;
18515 widen:
18516 /* There's no way to set one QImode entry easily. Combine
18517 the variable value with its adjacent constant value, and
18518 promote to an HImode set. */
18519 x = XVECEXP (vals, 0, one_var ^ 1);
18520 if (one_var & 1)
18522 var = convert_modes (HImode, QImode, var, true);
18523 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18524 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18525 x = GEN_INT (INTVAL (x) & 0xff);
18527 else
18529 var = convert_modes (HImode, QImode, var, true);
18530 x = gen_int_mode (INTVAL (x) << 8, HImode);
18532 if (x != const0_rtx)
18533 var = expand_simple_binop (HImode, IOR, var, x, var,
18534 1, OPTAB_LIB_WIDEN);
18536 x = gen_reg_rtx (wmode);
18537 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18538 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18540 emit_move_insn (target, gen_lowpart (mode, x));
18541 return true;
18543 default:
18544 return false;
18547 emit_move_insn (target, const_vec);
18548 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18549 return true;
18552 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18553 all values variable, and none identical. */
18555 static void
18556 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18557 rtx target, rtx vals)
18559 enum machine_mode half_mode = GET_MODE_INNER (mode);
18560 rtx op0 = NULL, op1 = NULL;
18561 bool use_vec_concat = false;
18563 switch (mode)
18565 case V2SFmode:
18566 case V2SImode:
18567 if (!mmx_ok && !TARGET_SSE)
18568 break;
18569 /* FALLTHRU */
18571 case V2DFmode:
18572 case V2DImode:
18573 /* For the two element vectors, we always implement VEC_CONCAT. */
18574 op0 = XVECEXP (vals, 0, 0);
18575 op1 = XVECEXP (vals, 0, 1);
18576 use_vec_concat = true;
18577 break;
18579 case V4SFmode:
18580 half_mode = V2SFmode;
18581 goto half;
18582 case V4SImode:
18583 half_mode = V2SImode;
18584 goto half;
18585 half:
18587 rtvec v;
18589 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18590 Recurse to load the two halves. */
18592 op0 = gen_reg_rtx (half_mode);
18593 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18594 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18596 op1 = gen_reg_rtx (half_mode);
18597 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18598 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18600 use_vec_concat = true;
18602 break;
18604 case V8HImode:
18605 case V16QImode:
18606 case V4HImode:
18607 case V8QImode:
18608 break;
18610 default:
18611 gcc_unreachable ();
18614 if (use_vec_concat)
18616 if (!register_operand (op0, half_mode))
18617 op0 = force_reg (half_mode, op0);
18618 if (!register_operand (op1, half_mode))
18619 op1 = force_reg (half_mode, op1);
18621 emit_insn (gen_rtx_SET (VOIDmode, target,
18622 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18624 else
18626 int i, j, n_elts, n_words, n_elt_per_word;
18627 enum machine_mode inner_mode;
18628 rtx words[4], shift;
18630 inner_mode = GET_MODE_INNER (mode);
18631 n_elts = GET_MODE_NUNITS (mode);
18632 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18633 n_elt_per_word = n_elts / n_words;
18634 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18636 for (i = 0; i < n_words; ++i)
18638 rtx word = NULL_RTX;
18640 for (j = 0; j < n_elt_per_word; ++j)
18642 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18643 elt = convert_modes (word_mode, inner_mode, elt, true);
18645 if (j == 0)
18646 word = elt;
18647 else
18649 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18650 word, 1, OPTAB_LIB_WIDEN);
18651 word = expand_simple_binop (word_mode, IOR, word, elt,
18652 word, 1, OPTAB_LIB_WIDEN);
18656 words[i] = word;
18659 if (n_words == 1)
18660 emit_move_insn (target, gen_lowpart (mode, words[0]));
18661 else if (n_words == 2)
18663 rtx tmp = gen_reg_rtx (mode);
18664 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18665 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18666 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18667 emit_move_insn (target, tmp);
18669 else if (n_words == 4)
18671 rtx tmp = gen_reg_rtx (V4SImode);
18672 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18673 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18674 emit_move_insn (target, gen_lowpart (mode, tmp));
18676 else
18677 gcc_unreachable ();
18681 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18682 instructions unless MMX_OK is true. */
18684 void
18685 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18687 enum machine_mode mode = GET_MODE (target);
18688 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18689 int n_elts = GET_MODE_NUNITS (mode);
18690 int n_var = 0, one_var = -1;
18691 bool all_same = true, all_const_zero = true;
18692 int i;
18693 rtx x;
18695 for (i = 0; i < n_elts; ++i)
18697 x = XVECEXP (vals, 0, i);
18698 if (!CONSTANT_P (x))
18699 n_var++, one_var = i;
18700 else if (x != CONST0_RTX (inner_mode))
18701 all_const_zero = false;
18702 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18703 all_same = false;
18706 /* Constants are best loaded from the constant pool. */
18707 if (n_var == 0)
18709 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18710 return;
18713 /* If all values are identical, broadcast the value. */
18714 if (all_same
18715 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18716 XVECEXP (vals, 0, 0)))
18717 return;
18719 /* Values where only one field is non-constant are best loaded from
18720 the pool and overwritten via move later. */
18721 if (n_var == 1)
18723 if (all_const_zero
18724 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18725 XVECEXP (vals, 0, one_var),
18726 one_var))
18727 return;
18729 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18730 return;
18733 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18736 void
18737 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18739 enum machine_mode mode = GET_MODE (target);
18740 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18741 bool use_vec_merge = false;
18742 rtx tmp;
18744 switch (mode)
18746 case V2SFmode:
18747 case V2SImode:
18748 if (mmx_ok)
18750 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18751 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18752 if (elt == 0)
18753 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18754 else
18755 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18756 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18757 return;
18759 break;
18761 case V2DFmode:
18762 case V2DImode:
18764 rtx op0, op1;
18766 /* For the two element vectors, we implement a VEC_CONCAT with
18767 the extraction of the other element. */
18769 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18770 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18772 if (elt == 0)
18773 op0 = val, op1 = tmp;
18774 else
18775 op0 = tmp, op1 = val;
18777 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18778 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18780 return;
18782 case V4SFmode:
18783 switch (elt)
18785 case 0:
18786 use_vec_merge = true;
18787 break;
18789 case 1:
18790 /* tmp = target = A B C D */
18791 tmp = copy_to_reg (target);
18792 /* target = A A B B */
18793 emit_insn (gen_sse_unpcklps (target, target, target));
18794 /* target = X A B B */
18795 ix86_expand_vector_set (false, target, val, 0);
18796 /* target = A X C D */
18797 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18798 GEN_INT (1), GEN_INT (0),
18799 GEN_INT (2+4), GEN_INT (3+4)));
18800 return;
18802 case 2:
18803 /* tmp = target = A B C D */
18804 tmp = copy_to_reg (target);
18805 /* tmp = X B C D */
18806 ix86_expand_vector_set (false, tmp, val, 0);
18807 /* target = A B X D */
18808 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18809 GEN_INT (0), GEN_INT (1),
18810 GEN_INT (0+4), GEN_INT (3+4)));
18811 return;
18813 case 3:
18814 /* tmp = target = A B C D */
18815 tmp = copy_to_reg (target);
18816 /* tmp = X B C D */
18817 ix86_expand_vector_set (false, tmp, val, 0);
18818 /* target = A B X D */
18819 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18820 GEN_INT (0), GEN_INT (1),
18821 GEN_INT (2+4), GEN_INT (0+4)));
18822 return;
18824 default:
18825 gcc_unreachable ();
18827 break;
18829 case V4SImode:
18830 /* Element 0 handled by vec_merge below. */
18831 if (elt == 0)
18833 use_vec_merge = true;
18834 break;
18837 if (TARGET_SSE2)
18839 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18840 store into element 0, then shuffle them back. */
18842 rtx order[4];
18844 order[0] = GEN_INT (elt);
18845 order[1] = const1_rtx;
18846 order[2] = const2_rtx;
18847 order[3] = GEN_INT (3);
18848 order[elt] = const0_rtx;
18850 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18851 order[1], order[2], order[3]));
18853 ix86_expand_vector_set (false, target, val, 0);
18855 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18856 order[1], order[2], order[3]));
18858 else
18860 /* For SSE1, we have to reuse the V4SF code. */
18861 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18862 gen_lowpart (SFmode, val), elt);
18864 return;
18866 case V8HImode:
18867 use_vec_merge = TARGET_SSE2;
18868 break;
18869 case V4HImode:
18870 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18871 break;
18873 case V16QImode:
18874 case V8QImode:
18875 default:
18876 break;
18879 if (use_vec_merge)
18881 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18882 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18883 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18885 else
18887 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18889 emit_move_insn (mem, target);
18891 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18892 emit_move_insn (tmp, val);
18894 emit_move_insn (target, mem);
18898 void
18899 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18901 enum machine_mode mode = GET_MODE (vec);
18902 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18903 bool use_vec_extr = false;
18904 rtx tmp;
18906 switch (mode)
18908 case V2SImode:
18909 case V2SFmode:
18910 if (!mmx_ok)
18911 break;
18912 /* FALLTHRU */
18914 case V2DFmode:
18915 case V2DImode:
18916 use_vec_extr = true;
18917 break;
18919 case V4SFmode:
18920 switch (elt)
18922 case 0:
18923 tmp = vec;
18924 break;
18926 case 1:
18927 case 3:
18928 tmp = gen_reg_rtx (mode);
18929 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18930 GEN_INT (elt), GEN_INT (elt),
18931 GEN_INT (elt+4), GEN_INT (elt+4)));
18932 break;
18934 case 2:
18935 tmp = gen_reg_rtx (mode);
18936 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18937 break;
18939 default:
18940 gcc_unreachable ();
18942 vec = tmp;
18943 use_vec_extr = true;
18944 elt = 0;
18945 break;
18947 case V4SImode:
18948 if (TARGET_SSE2)
18950 switch (elt)
18952 case 0:
18953 tmp = vec;
18954 break;
18956 case 1:
18957 case 3:
18958 tmp = gen_reg_rtx (mode);
18959 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18960 GEN_INT (elt), GEN_INT (elt),
18961 GEN_INT (elt), GEN_INT (elt)));
18962 break;
18964 case 2:
18965 tmp = gen_reg_rtx (mode);
18966 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18967 break;
18969 default:
18970 gcc_unreachable ();
18972 vec = tmp;
18973 use_vec_extr = true;
18974 elt = 0;
18976 else
18978 /* For SSE1, we have to reuse the V4SF code. */
18979 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18980 gen_lowpart (V4SFmode, vec), elt);
18981 return;
18983 break;
18985 case V8HImode:
18986 use_vec_extr = TARGET_SSE2;
18987 break;
18988 case V4HImode:
18989 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18990 break;
18992 case V16QImode:
18993 case V8QImode:
18994 /* ??? Could extract the appropriate HImode element and shift. */
18995 default:
18996 break;
18999 if (use_vec_extr)
19001 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19002 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19004 /* Let the rtl optimizers know about the zero extension performed. */
19005 if (inner_mode == HImode)
19007 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19008 target = gen_lowpart (SImode, target);
19011 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19013 else
19015 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19017 emit_move_insn (mem, vec);
19019 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19020 emit_move_insn (target, tmp);
19024 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19025 pattern to reduce; DEST is the destination; IN is the input vector. */
19027 void
19028 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19030 rtx tmp1, tmp2, tmp3;
19032 tmp1 = gen_reg_rtx (V4SFmode);
19033 tmp2 = gen_reg_rtx (V4SFmode);
19034 tmp3 = gen_reg_rtx (V4SFmode);
19036 emit_insn (gen_sse_movhlps (tmp1, in, in));
19037 emit_insn (fn (tmp2, tmp1, in));
19039 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19040 GEN_INT (1), GEN_INT (1),
19041 GEN_INT (1+4), GEN_INT (1+4)));
19042 emit_insn (fn (dest, tmp2, tmp3));
19045 /* Target hook for scalar_mode_supported_p. */
19046 static bool
19047 ix86_scalar_mode_supported_p (enum machine_mode mode)
19049 if (DECIMAL_FLOAT_MODE_P (mode))
19050 return true;
19051 else
19052 return default_scalar_mode_supported_p (mode);
19055 /* Implements target hook vector_mode_supported_p. */
19056 static bool
19057 ix86_vector_mode_supported_p (enum machine_mode mode)
19059 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19060 return true;
19061 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19062 return true;
19063 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19064 return true;
19065 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19066 return true;
19067 return false;
19070 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19072 We do this in the new i386 backend to maintain source compatibility
19073 with the old cc0-based compiler. */
19075 static tree
19076 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19077 tree inputs ATTRIBUTE_UNUSED,
19078 tree clobbers)
19080 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19081 clobbers);
19082 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19083 clobbers);
19084 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19085 clobbers);
19086 return clobbers;
19089 /* Return true if this goes in small data/bss. */
19091 static bool
19092 ix86_in_large_data_p (tree exp)
19094 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19095 return false;
19097 /* Functions are never large data. */
19098 if (TREE_CODE (exp) == FUNCTION_DECL)
19099 return false;
19101 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19103 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19104 if (strcmp (section, ".ldata") == 0
19105 || strcmp (section, ".lbss") == 0)
19106 return true;
19107 return false;
19109 else
19111 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19113 /* If this is an incomplete type with size 0, then we can't put it
19114 in data because it might be too big when completed. */
19115 if (!size || size > ix86_section_threshold)
19116 return true;
19119 return false;
19121 static void
19122 ix86_encode_section_info (tree decl, rtx rtl, int first)
19124 default_encode_section_info (decl, rtl, first);
19126 if (TREE_CODE (decl) == VAR_DECL
19127 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19128 && ix86_in_large_data_p (decl))
19129 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19132 /* Worker function for REVERSE_CONDITION. */
19134 enum rtx_code
19135 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19137 return (mode != CCFPmode && mode != CCFPUmode
19138 ? reverse_condition (code)
19139 : reverse_condition_maybe_unordered (code));
19142 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19143 to OPERANDS[0]. */
19145 const char *
19146 output_387_reg_move (rtx insn, rtx *operands)
19148 if (REG_P (operands[1])
19149 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19151 if (REGNO (operands[0]) == FIRST_STACK_REG)
19152 return output_387_ffreep (operands, 0);
19153 return "fstp\t%y0";
19155 if (STACK_TOP_P (operands[0]))
19156 return "fld%z1\t%y1";
19157 return "fst\t%y0";
19160 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19161 FP status register is set. */
19163 void
19164 ix86_emit_fp_unordered_jump (rtx label)
19166 rtx reg = gen_reg_rtx (HImode);
19167 rtx temp;
19169 emit_insn (gen_x86_fnstsw_1 (reg));
19171 if (TARGET_USE_SAHF)
19173 emit_insn (gen_x86_sahf_1 (reg));
19175 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19176 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19178 else
19180 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19182 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19183 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19186 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19187 gen_rtx_LABEL_REF (VOIDmode, label),
19188 pc_rtx);
19189 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19190 emit_jump_insn (temp);
19193 /* Output code to perform a log1p XFmode calculation. */
19195 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19197 rtx label1 = gen_label_rtx ();
19198 rtx label2 = gen_label_rtx ();
19200 rtx tmp = gen_reg_rtx (XFmode);
19201 rtx tmp2 = gen_reg_rtx (XFmode);
19203 emit_insn (gen_absxf2 (tmp, op1));
19204 emit_insn (gen_cmpxf (tmp,
19205 CONST_DOUBLE_FROM_REAL_VALUE (
19206 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19207 XFmode)));
19208 emit_jump_insn (gen_bge (label1));
19210 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19211 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19212 emit_jump (label2);
19214 emit_label (label1);
19215 emit_move_insn (tmp, CONST1_RTX (XFmode));
19216 emit_insn (gen_addxf3 (tmp, op1, tmp));
19217 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19218 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19220 emit_label (label2);
19223 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19225 static void
19226 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19227 tree decl)
19229 /* With Binutils 2.15, the "@unwind" marker must be specified on
19230 every occurrence of the ".eh_frame" section, not just the first
19231 one. */
19232 if (TARGET_64BIT
19233 && strcmp (name, ".eh_frame") == 0)
19235 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19236 flags & SECTION_WRITE ? "aw" : "a");
19237 return;
19239 default_elf_asm_named_section (name, flags, decl);
19242 /* Return the mangling of TYPE if it is an extended fundamental type. */
19244 static const char *
19245 ix86_mangle_fundamental_type (tree type)
19247 switch (TYPE_MODE (type))
19249 case TFmode:
19250 /* __float128 is "g". */
19251 return "g";
19252 case XFmode:
19253 /* "long double" or __float80 is "e". */
19254 return "e";
19255 default:
19256 return NULL;
19260 /* For 32-bit code we can save PIC register setup by using
19261 __stack_chk_fail_local hidden function instead of calling
19262 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19263 register, so it is better to call __stack_chk_fail directly. */
19265 static tree
19266 ix86_stack_protect_fail (void)
19268 return TARGET_64BIT
19269 ? default_external_stack_protect_fail ()
19270 : default_hidden_stack_protect_fail ();
19273 /* Select a format to encode pointers in exception handling data. CODE
19274 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19275 true if the symbol may be affected by dynamic relocations.
19277 ??? All x86 object file formats are capable of representing this.
19278 After all, the relocation needed is the same as for the call insn.
19279 Whether or not a particular assembler allows us to enter such, I
19280 guess we'll have to see. */
19282 asm_preferred_eh_data_format (int code, int global)
19284 if (flag_pic)
19286 int type = DW_EH_PE_sdata8;
19287 if (!TARGET_64BIT
19288 || ix86_cmodel == CM_SMALL_PIC
19289 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19290 type = DW_EH_PE_sdata4;
19291 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19293 if (ix86_cmodel == CM_SMALL
19294 || (ix86_cmodel == CM_MEDIUM && code))
19295 return DW_EH_PE_udata4;
19296 return DW_EH_PE_absptr;
19299 /* Expand copysign from SIGN to the positive value ABS_VALUE
19300 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19301 the sign-bit. */
19302 static void
19303 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
19305 enum machine_mode mode = GET_MODE (sign);
19306 rtx sgn = gen_reg_rtx (mode);
19307 if (mask == NULL_RTX)
19309 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
19310 if (!VECTOR_MODE_P (mode))
19312 /* We need to generate a scalar mode mask in this case. */
19313 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19314 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19315 mask = gen_reg_rtx (mode);
19316 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19319 else
19320 mask = gen_rtx_NOT (mode, mask);
19321 emit_insn (gen_rtx_SET (VOIDmode, sgn,
19322 gen_rtx_AND (mode, mask, sign)));
19323 emit_insn (gen_rtx_SET (VOIDmode, result,
19324 gen_rtx_IOR (mode, abs_value, sgn)));
19327 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19328 mask for masking out the sign-bit is stored in *SMASK, if that is
19329 non-null. */
19330 static rtx
19331 ix86_expand_sse_fabs (rtx op0, rtx *smask)
19333 enum machine_mode mode = GET_MODE (op0);
19334 rtx xa, mask;
19336 xa = gen_reg_rtx (mode);
19337 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
19338 if (!VECTOR_MODE_P (mode))
19340 /* We need to generate a scalar mode mask in this case. */
19341 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19342 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19343 mask = gen_reg_rtx (mode);
19344 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19346 emit_insn (gen_rtx_SET (VOIDmode, xa,
19347 gen_rtx_AND (mode, op0, mask)));
19349 if (smask)
19350 *smask = mask;
19352 return xa;
19355 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19356 swapping the operands if SWAP_OPERANDS is true. The expanded
19357 code is a forward jump to a newly created label in case the
19358 comparison is true. The generated label rtx is returned. */
19359 static rtx
19360 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
19361 bool swap_operands)
19363 rtx label, tmp;
19365 if (swap_operands)
19367 tmp = op0;
19368 op0 = op1;
19369 op1 = tmp;
19372 label = gen_label_rtx ();
19373 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
19374 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19375 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
19376 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
19377 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19378 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
19379 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19380 JUMP_LABEL (tmp) = label;
19382 return label;
19385 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19386 using comparison code CODE. Operands are swapped for the comparison if
19387 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19388 static rtx
19389 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
19390 bool swap_operands)
19392 enum machine_mode mode = GET_MODE (op0);
19393 rtx mask = gen_reg_rtx (mode);
19395 if (swap_operands)
19397 rtx tmp = op0;
19398 op0 = op1;
19399 op1 = tmp;
19402 if (mode == DFmode)
19403 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
19404 gen_rtx_fmt_ee (code, mode, op0, op1)));
19405 else
19406 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
19407 gen_rtx_fmt_ee (code, mode, op0, op1)));
19409 return mask;
19412 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19413 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19414 static rtx
19415 ix86_gen_TWO52 (enum machine_mode mode)
19417 REAL_VALUE_TYPE TWO52r;
19418 rtx TWO52;
19420 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
19421 TWO52 = const_double_from_real_value (TWO52r, mode);
19422 TWO52 = force_reg (mode, TWO52);
19424 return TWO52;
19427 /* Expand SSE sequence for computing lround from OP1 storing
19428 into OP0. */
19429 void
19430 ix86_expand_lround (rtx op0, rtx op1)
19432 /* C code for the stuff we're doing below:
19433 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19434 return (long)tmp;
19436 enum machine_mode mode = GET_MODE (op1);
19437 const struct real_format *fmt;
19438 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19439 rtx adj;
19441 /* load nextafter (0.5, 0.0) */
19442 fmt = REAL_MODE_FORMAT (mode);
19443 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19444 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19446 /* adj = copysign (0.5, op1) */
19447 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
19448 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
19450 /* adj = op1 + adj */
19451 expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
19453 /* op0 = (imode)adj */
19454 expand_fix (op0, adj, 0);
19457 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19458 into OPERAND0. */
19459 void
19460 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
19462 /* C code for the stuff we're doing below (for do_floor):
19463 xi = (long)op1;
19464 xi -= (double)xi > op1 ? 1 : 0;
19465 return xi;
19467 enum machine_mode fmode = GET_MODE (op1);
19468 enum machine_mode imode = GET_MODE (op0);
19469 rtx ireg, freg, label;
19471 /* reg = (long)op1 */
19472 ireg = gen_reg_rtx (imode);
19473 expand_fix (ireg, op1, 0);
19475 /* freg = (double)reg */
19476 freg = gen_reg_rtx (fmode);
19477 expand_float (freg, ireg, 0);
19479 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19480 label = ix86_expand_sse_compare_and_jump (UNLE,
19481 freg, op1, !do_floor);
19482 expand_simple_binop (imode, do_floor ? MINUS : PLUS,
19483 ireg, const1_rtx, ireg, 0, OPTAB_DIRECT);
19484 emit_label (label);
19485 LABEL_NUSES (label) = 1;
19487 emit_move_insn (op0, ireg);
19490 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19491 result in OPERAND0. */
19492 void
19493 ix86_expand_rint (rtx operand0, rtx operand1)
19495 /* C code for the stuff we're doing below:
19496 xa = fabs (operand1);
19497 if (!isless (xa, 2**52))
19498 return operand1;
19499 xa = xa + 2**52 - 2**52;
19500 return copysign (xa, operand1);
19502 enum machine_mode mode = GET_MODE (operand0);
19503 rtx res, xa, label, TWO52, mask;
19505 res = gen_reg_rtx (mode);
19506 emit_move_insn (res, operand1);
19508 /* xa = abs (operand1) */
19509 xa = ix86_expand_sse_fabs (res, &mask);
19511 /* if (!isless (xa, TWO52)) goto label; */
19512 TWO52 = ix86_gen_TWO52 (mode);
19513 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19515 expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19516 expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19518 ix86_sse_copysign_to_positive (res, xa, res, mask);
19520 emit_label (label);
19521 LABEL_NUSES (label) = 1;
19523 emit_move_insn (operand0, res);
19526 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19527 into OPERAND0. */
19528 void
19529 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
19531 /* C code for the stuff we expand below.
19532 double xa = fabs (x), x2;
19533 if (!isless (xa, TWO52))
19534 return x;
19535 xa = xa + TWO52 - TWO52;
19536 x2 = copysign (xa, x);
19537 Compensate. Floor:
19538 if (x2 > x)
19539 x2 -= 1;
19540 Compensate. Ceil:
19541 if (x2 < x)
19542 x2 -= -1;
19543 return x2;
19545 enum machine_mode mode = GET_MODE (operand0);
19546 rtx xa, TWO52, tmp, label, one, res, mask;
19548 TWO52 = ix86_gen_TWO52 (mode);
19550 /* Temporary for holding the result, initialized to the input
19551 operand to ease control flow. */
19552 res = gen_reg_rtx (mode);
19553 emit_move_insn (res, operand1);
19555 /* xa = abs (operand1) */
19556 xa = ix86_expand_sse_fabs (res, &mask);
19558 /* if (!isless (xa, TWO52)) goto label; */
19559 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19561 /* xa = xa + TWO52 - TWO52; */
19562 expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19563 expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19565 /* xa = copysign (xa, operand1) */
19566 ix86_sse_copysign_to_positive (xa, xa, res, mask);
19568 /* generate 1.0 or -1.0 */
19569 one = force_reg (mode,
19570 const_double_from_real_value (do_floor
19571 ? dconst1 : dconstm1, mode));
19573 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19574 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19575 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19576 gen_rtx_AND (mode, one, tmp)));
19577 /* We always need to subtract here to preserve signed zero. */
19578 expand_simple_binop (mode, MINUS,
19579 xa, tmp, res, 0, OPTAB_DIRECT);
19581 emit_label (label);
19582 LABEL_NUSES (label) = 1;
19584 emit_move_insn (operand0, res);
19587 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19588 into OPERAND0. */
19589 void
19590 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
19592 /* C code for the stuff we expand below.
19593 double xa = fabs (x), x2;
19594 if (!isless (xa, TWO52))
19595 return x;
19596 x2 = (double)(long)x;
19597 Compensate. Floor:
19598 if (x2 > x)
19599 x2 -= 1;
19600 Compensate. Ceil:
19601 if (x2 < x)
19602 x2 += 1;
19603 if (HONOR_SIGNED_ZEROS (mode))
19604 return copysign (x2, x);
19605 return x2;
19607 enum machine_mode mode = GET_MODE (operand0);
19608 rtx xa, xi, TWO52, tmp, label, one, res, mask;
19610 TWO52 = ix86_gen_TWO52 (mode);
19612 /* Temporary for holding the result, initialized to the input
19613 operand to ease control flow. */
19614 res = gen_reg_rtx (mode);
19615 emit_move_insn (res, operand1);
19617 /* xa = abs (operand1) */
19618 xa = ix86_expand_sse_fabs (res, &mask);
19620 /* if (!isless (xa, TWO52)) goto label; */
19621 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19623 /* xa = (double)(long)x */
19624 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19625 expand_fix (xi, res, 0);
19626 expand_float (xa, xi, 0);
19628 /* generate 1.0 */
19629 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19631 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19632 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19633 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19634 gen_rtx_AND (mode, one, tmp)));
19635 expand_simple_binop (mode, do_floor ? MINUS : PLUS,
19636 xa, tmp, res, 0, OPTAB_DIRECT);
19638 if (HONOR_SIGNED_ZEROS (mode))
19639 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19641 emit_label (label);
19642 LABEL_NUSES (label) = 1;
19644 emit_move_insn (operand0, res);
19647 /* Expand SSE sequence for computing round from OPERAND1 storing
19648 into OPERAND0. Sequence that works without relying on DImode truncation
19649 via cvttsd2siq that is only available on 64bit targets. */
19650 void
19651 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
19653 /* C code for the stuff we expand below.
19654 double xa = fabs (x), xa2, x2;
19655 if (!isless (xa, TWO52))
19656 return x;
19657 Using the absolute value and copying back sign makes
19658 -0.0 -> -0.0 correct.
19659 xa2 = xa + TWO52 - TWO52;
19660 Compensate.
19661 dxa = xa2 - xa;
19662 if (dxa <= -0.5)
19663 xa2 += 1;
19664 else if (dxa > 0.5)
19665 xa2 -= 1;
19666 x2 = copysign (xa2, x);
19667 return x2;
19669 enum machine_mode mode = GET_MODE (operand0);
19670 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
19672 TWO52 = ix86_gen_TWO52 (mode);
19674 /* Temporary for holding the result, initialized to the input
19675 operand to ease control flow. */
19676 res = gen_reg_rtx (mode);
19677 emit_move_insn (res, operand1);
19679 /* xa = abs (operand1) */
19680 xa = ix86_expand_sse_fabs (res, &mask);
19682 /* if (!isless (xa, TWO52)) goto label; */
19683 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19685 /* xa2 = xa + TWO52 - TWO52; */
19686 xa2 = gen_reg_rtx (mode);
19687 expand_simple_binop (mode, PLUS, xa, TWO52, xa2, 0, OPTAB_DIRECT);
19688 expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
19690 /* dxa = xa2 - xa; */
19691 dxa = gen_reg_rtx (mode);
19692 expand_simple_binop (mode, MINUS, xa2, xa, dxa, 0, OPTAB_DIRECT);
19694 /* generate 0.5, 1.0 and -0.5 */
19695 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
19696 one = gen_reg_rtx (mode);
19697 expand_simple_binop (mode, PLUS, half, half, one, 0, OPTAB_DIRECT);
19698 mhalf = gen_reg_rtx (mode);
19699 expand_simple_binop (mode, MINUS, half, one, mhalf, 0, OPTAB_DIRECT);
19701 /* Compensate. */
19702 tmp = gen_reg_rtx (mode);
19703 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19704 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
19705 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19706 gen_rtx_AND (mode, one, tmp)));
19707 expand_simple_binop (mode, MINUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
19708 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19709 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
19710 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19711 gen_rtx_AND (mode, one, tmp)));
19712 expand_simple_binop (mode, PLUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
19714 /* res = copysign (xa2, operand1) */
19715 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
19717 emit_label (label);
19718 LABEL_NUSES (label) = 1;
19720 emit_move_insn (operand0, res);
19723 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19724 into OPERAND0. */
19725 void
19726 ix86_expand_trunc (rtx operand0, rtx operand1)
19728 /* C code for SSE variant we expand below.
19729 double xa = fabs (x), x2;
19730 if (!isless (xa, TWO52))
19731 return x;
19732 x2 = (double)(long)x;
19733 if (HONOR_SIGNED_ZEROS (mode))
19734 return copysign (x2, x);
19735 return x2;
19737 enum machine_mode mode = GET_MODE (operand0);
19738 rtx xa, xi, TWO52, label, res, mask;
19740 TWO52 = ix86_gen_TWO52 (mode);
19742 /* Temporary for holding the result, initialized to the input
19743 operand to ease control flow. */
19744 res = gen_reg_rtx (mode);
19745 emit_move_insn (res, operand1);
19747 /* xa = abs (operand1) */
19748 xa = ix86_expand_sse_fabs (res, &mask);
19750 /* if (!isless (xa, TWO52)) goto label; */
19751 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19753 /* x = (double)(long)x */
19754 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19755 expand_fix (xi, res, 0);
19756 expand_float (res, xi, 0);
19758 if (HONOR_SIGNED_ZEROS (mode))
19759 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19761 emit_label (label);
19762 LABEL_NUSES (label) = 1;
19764 emit_move_insn (operand0, res);
19767 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19768 into OPERAND0. */
19769 void
19770 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
19772 enum machine_mode mode = GET_MODE (operand0);
19773 rtx xa, mask, TWO52, label, one, res, smask;
19775 /* C code for SSE variant we expand below.
19776 double xa = fabs (x), x2;
19777 if (!isless (xa, TWO52))
19778 return x;
19779 xa2 = xa + TWO52 - TWO52;
19780 Compensate:
19781 if (xa2 > xa)
19782 xa2 -= 1.0;
19783 x2 = copysign (xa2, x);
19784 return x2;
19787 TWO52 = ix86_gen_TWO52 (mode);
19789 /* Temporary for holding the result, initialized to the input
19790 operand to ease control flow. */
19791 res = gen_reg_rtx (mode);
19792 emit_move_insn (res, operand1);
19794 /* xa = abs (operand1) */
19795 xa = ix86_expand_sse_fabs (res, &smask);
19797 /* if (!isless (xa, TWO52)) goto label; */
19798 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19800 /* res = xa + TWO52 - TWO52; */
19801 expand_simple_binop (mode, PLUS, xa, TWO52, res, 0, OPTAB_DIRECT);
19802 expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
19804 /* generate 1.0 */
19805 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19807 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
19808 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
19809 emit_insn (gen_rtx_SET (VOIDmode, mask,
19810 gen_rtx_AND (mode, mask, one)));
19811 expand_simple_binop (mode, MINUS,
19812 res, mask, res, 0, OPTAB_DIRECT);
19814 /* res = copysign (res, operand1) */
19815 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
19817 emit_label (label);
19818 LABEL_NUSES (label) = 1;
19820 emit_move_insn (operand0, res);
19823 /* Expand SSE sequence for computing round from OPERAND1 storing
19824 into OPERAND0. */
19825 void
19826 ix86_expand_round (rtx operand0, rtx operand1)
19828 /* C code for the stuff we're doing below:
19829 double xa = fabs (x);
19830 if (!isless (xa, TWO52))
19831 return x;
19832 xa = (double)(long)(xa + nextafter (0.5, 0.0));
19833 return copysign (xa, x);
19835 enum machine_mode mode = GET_MODE (operand0);
19836 rtx res, TWO52, xa, label, xi, half, mask;
19837 const struct real_format *fmt;
19838 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19840 /* Temporary for holding the result, initialized to the input
19841 operand to ease control flow. */
19842 res = gen_reg_rtx (mode);
19843 emit_move_insn (res, operand1);
19845 TWO52 = ix86_gen_TWO52 (mode);
19846 xa = ix86_expand_sse_fabs (res, &mask);
19847 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19849 /* load nextafter (0.5, 0.0) */
19850 fmt = REAL_MODE_FORMAT (mode);
19851 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19852 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19854 /* xa = xa + 0.5 */
19855 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
19856 expand_simple_binop (mode, PLUS, xa, half, xa, 0, OPTAB_DIRECT);
19858 /* xa = (double)(int64_t)xa */
19859 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19860 expand_fix (xi, xa, 0);
19861 expand_float (xa, xi, 0);
19863 /* res = copysign (xa, operand1) */
19864 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
19866 emit_label (label);
19867 LABEL_NUSES (label) = 1;
19869 emit_move_insn (operand0, res);
19872 #include "gt-i386.h"