* builtins.c, config/arm/arm.c, config/i386/cygwin.h,
[official-gcc.git] / gcc / config / i386 / i386.c
blob907682547aa8126bc6238c2e54f67233a3a7c42c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
56 #endif
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
64 : 4)
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
70 static const
71 struct processor_costs size_cost = { /* costs for tunning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
89 0, /* "large" insn */
90 2, /* MOVE_RATIO */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
114 2, /* Branch cost */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
124 static const
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
144 3, /* MOVE_RATIO */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
168 1, /* Branch cost */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
177 static const
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
197 3, /* MOVE_RATIO */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
221 1, /* Branch cost */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
230 static const
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
250 6, /* MOVE_RATIO */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
274 2, /* Branch cost */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
283 static const
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
303 6, /* MOVE_RATIO */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
327 2, /* Branch cost */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
336 static const
337 struct processor_costs k6_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
356 4, /* MOVE_RATIO */
357 3, /* cost for loading QImode using movzbl */
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
380 1, /* Branch cost */
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
389 static const
390 struct processor_costs athlon_cost = {
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
400 0, /* cost of multiply per each bit set */
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
408 8, /* "large" insn */
409 9, /* MOVE_RATIO */
410 4, /* cost for loading QImode using movzbl */
411 {3, 4, 3}, /* cost of loading integer registers
412 in QImode, HImode and SImode.
413 Relative to reg-reg move (2). */
414 {3, 4, 3}, /* cost of storing integer registers */
415 4, /* cost of reg,reg fld/fst */
416 {4, 4, 12}, /* cost of loading fp registers
417 in SFmode, DFmode and XFmode */
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
420 2, /* cost of moving MMX register */
421 {4, 4}, /* cost of loading MMX registers
422 in SImode and DImode */
423 {4, 4}, /* cost of storing MMX registers
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
426 {4, 4, 6}, /* cost of loading SSE registers
427 in SImode, DImode and TImode */
428 {4, 4, 5}, /* cost of storing SSE registers
429 in SImode, DImode and TImode */
430 5, /* MMX or SSE register to integer */
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
433 5, /* Branch cost */
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
442 static const
443 struct processor_costs k8_cost = {
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
453 0, /* cost of multiply per each bit set */
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
461 8, /* "large" insn */
462 9, /* MOVE_RATIO */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
486 5, /* Branch cost */
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
495 static const
496 struct processor_costs pentium4_cost = {
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
506 0, /* cost of multiply per each bit set */
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
514 16, /* "large" insn */
515 6, /* MOVE_RATIO */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
539 2, /* Branch cost */
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
548 static const
549 struct processor_costs nocona_cost = {
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
559 0, /* cost of multiply per each bit set */
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
567 16, /* "large" insn */
568 17, /* MOVE_RATIO */
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
592 1, /* Branch cost */
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
601 /* Generic64 should produce code tuned for Nocona and K8. */
602 static const
603 struct processor_costs generic64_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecesary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 17, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
652 3, /* Branch cost */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
662 static const
663 struct processor_costs generic32_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
682 17, /* MOVE_RATIO */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
706 3, /* Branch cost */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 const struct processor_costs *ix86_cost = &pentium_cost;
717 /* Processor feature/optimization bitmasks. */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6 (1<<PROCESSOR_K6)
723 #define m_ATHLON (1<<PROCESSOR_ATHLON)
724 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
725 #define m_K8 (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
727 #define m_NOCONA (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
732 /* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
740 const int x86_zero_extend_with_and = m_486 | m_PENT;
741 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
742 const int x86_double_with_add = ~m_386;
743 const int x86_use_bit_test = m_386;
744 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
746 const int x86_fisttp = m_NOCONA;
747 const int x86_3dnow_a = m_ATHLON_K8;
748 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
749 /* Branch hints were put in P4 based on simulation result. But
750 after P4 was made, no performance benefit was observed with
751 branch hints. It also increases the code size. As the result,
752 icc never generates branch hints. */
753 const int x86_branch_hints = 0;
754 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
755 /* We probably ought to watch for partial register stalls on Generic32
756 compilation setting as well. However in current implementation the
757 partial register stalls are not eliminated very well - they can
758 be introduced via subregs synthesized by combine and can happen
759 in caller/callee saving sequences.
760 Because this option pays back little on PPro based chips and is in conflict
761 with partial reg. dependencies used by Athlon/P4 based chips, it is better
762 to leave it off for generic32 for now. */
763 const int x86_partial_reg_stall = m_PPRO;
764 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
765 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
766 const int x86_use_mov0 = m_K6;
767 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
768 const int x86_read_modify_write = ~m_PENT;
769 const int x86_read_modify = ~(m_PENT | m_PPRO);
770 const int x86_split_long_moves = m_PPRO;
771 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
772 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
773 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
774 const int x86_qimode_math = ~(0);
775 const int x86_promote_qi_regs = 0;
776 /* On PPro this flag is meant to avoid partial register stalls. Just like
777 the x86_partial_reg_stall this option might be considered for Generic32
778 if our scheme for avoiding partial stalls was more effective. */
779 const int x86_himode_math = ~(m_PPRO);
780 const int x86_promote_hi_regs = m_PPRO;
781 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
782 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
783 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
786 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
787 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
789 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
790 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
791 const int x86_shift1 = ~m_486;
792 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
793 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
794 that thread 128bit SSE registers as single units versus K8 based chips that
795 divide SSE registers to two 64bit halves.
796 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
797 to allow register renaming on 128bit SSE units, but usually results in one
798 extra microop on 64bit SSE units. Experimental results shows that disabling
799 this option on P4 brings over 20% SPECfp regression, while enabling it on
800 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
801 of moves. */
802 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
803 /* Set for machines where the type and dependencies are resolved on SSE
804 register parts instead of whole registers, so we may maintain just
805 lower part of scalar values in proper format leaving the upper part
806 undefined. */
807 const int x86_sse_split_regs = m_ATHLON_K8;
808 const int x86_sse_typeless_stores = m_ATHLON_K8;
809 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
810 const int x86_use_ffreep = m_ATHLON_K8;
811 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
812 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
814 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
815 integer data in xmm registers. Which results in pretty abysmal code. */
816 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
818 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
819 /* Some CPU cores are not able to predict more than 4 branch instructions in
820 the 16 byte window. */
821 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
822 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
823 const int x86_use_bt = m_ATHLON_K8;
824 /* Compare and exchange was added for 80486. */
825 const int x86_cmpxchg = ~m_386;
826 /* Compare and exchange 8 bytes was added for pentium. */
827 const int x86_cmpxchg8b = ~(m_386 | m_486);
828 /* Compare and exchange 16 bytes was added for nocona. */
829 const int x86_cmpxchg16b = m_NOCONA;
830 /* Exchange and add was added for 80486. */
831 const int x86_xadd = ~m_386;
832 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
834 /* In case the average insn count for single function invocation is
835 lower than this constant, emit fast (but longer) prologue and
836 epilogue code. */
837 #define FAST_PROLOGUE_INSN_COUNT 20
839 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
840 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
841 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
842 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
844 /* Array of the smallest class containing reg number REGNO, indexed by
845 REGNO. Used by REGNO_REG_CLASS in i386.h. */
847 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
849 /* ax, dx, cx, bx */
850 AREG, DREG, CREG, BREG,
851 /* si, di, bp, sp */
852 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
853 /* FP registers */
854 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
855 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
856 /* arg pointer */
857 NON_Q_REGS,
858 /* flags, fpsr, dirflag, frame */
859 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
860 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
861 SSE_REGS, SSE_REGS,
862 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
863 MMX_REGS, MMX_REGS,
864 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
865 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
867 SSE_REGS, SSE_REGS,
870 /* The "default" register map used in 32bit mode. */
872 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
874 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
875 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
876 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
877 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
878 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
879 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
883 static int const x86_64_int_parameter_registers[6] =
885 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
886 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
889 static int const x86_64_int_return_registers[4] =
891 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
894 /* The "default" register map used in 64bit mode. */
895 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
897 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
898 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
899 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
900 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
901 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
902 8,9,10,11,12,13,14,15, /* extended integer registers */
903 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
906 /* Define the register numbers to be used in Dwarf debugging information.
907 The SVR4 reference port C compiler uses the following register numbers
908 in its Dwarf output code:
909 0 for %eax (gcc regno = 0)
910 1 for %ecx (gcc regno = 2)
911 2 for %edx (gcc regno = 1)
912 3 for %ebx (gcc regno = 3)
913 4 for %esp (gcc regno = 7)
914 5 for %ebp (gcc regno = 6)
915 6 for %esi (gcc regno = 4)
916 7 for %edi (gcc regno = 5)
917 The following three DWARF register numbers are never generated by
918 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
919 believes these numbers have these meanings.
920 8 for %eip (no gcc equivalent)
921 9 for %eflags (gcc regno = 17)
922 10 for %trapno (no gcc equivalent)
923 It is not at all clear how we should number the FP stack registers
924 for the x86 architecture. If the version of SDB on x86/svr4 were
925 a bit less brain dead with respect to floating-point then we would
926 have a precedent to follow with respect to DWARF register numbers
927 for x86 FP registers, but the SDB on x86/svr4 is so completely
928 broken with respect to FP registers that it is hardly worth thinking
929 of it as something to strive for compatibility with.
930 The version of x86/svr4 SDB I have at the moment does (partially)
931 seem to believe that DWARF register number 11 is associated with
932 the x86 register %st(0), but that's about all. Higher DWARF
933 register numbers don't seem to be associated with anything in
934 particular, and even for DWARF regno 11, SDB only seems to under-
935 stand that it should say that a variable lives in %st(0) (when
936 asked via an `=' command) if we said it was in DWARF regno 11,
937 but SDB still prints garbage when asked for the value of the
938 variable in question (via a `/' command).
939 (Also note that the labels SDB prints for various FP stack regs
940 when doing an `x' command are all wrong.)
941 Note that these problems generally don't affect the native SVR4
942 C compiler because it doesn't allow the use of -O with -g and
943 because when it is *not* optimizing, it allocates a memory
944 location for each floating-point variable, and the memory
945 location is what gets described in the DWARF AT_location
946 attribute for the variable in question.
947 Regardless of the severe mental illness of the x86/svr4 SDB, we
948 do something sensible here and we use the following DWARF
949 register numbers. Note that these are all stack-top-relative
950 numbers.
951 11 for %st(0) (gcc regno = 8)
952 12 for %st(1) (gcc regno = 9)
953 13 for %st(2) (gcc regno = 10)
954 14 for %st(3) (gcc regno = 11)
955 15 for %st(4) (gcc regno = 12)
956 16 for %st(5) (gcc regno = 13)
957 17 for %st(6) (gcc regno = 14)
958 18 for %st(7) (gcc regno = 15)
960 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
962 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
963 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
964 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
965 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
966 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
967 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
971 /* Test and compare insns in i386.md store the information needed to
972 generate branch and scc insns here. */
974 rtx ix86_compare_op0 = NULL_RTX;
975 rtx ix86_compare_op1 = NULL_RTX;
976 rtx ix86_compare_emitted = NULL_RTX;
978 /* Size of the register save area. */
979 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
981 /* Define the structure for the machine field in struct function. */
983 struct stack_local_entry GTY(())
985 unsigned short mode;
986 unsigned short n;
987 rtx rtl;
988 struct stack_local_entry *next;
991 /* Structure describing stack frame layout.
992 Stack grows downward:
994 [arguments]
995 <- ARG_POINTER
996 saved pc
998 saved frame pointer if frame_pointer_needed
999 <- HARD_FRAME_POINTER
1000 [saved regs]
1002 [padding1] \
1004 [va_arg registers] (
1005 > to_allocate <- FRAME_POINTER
1006 [frame] (
1008 [padding2] /
1010 struct ix86_frame
1012 int nregs;
1013 int padding1;
1014 int va_arg_size;
1015 HOST_WIDE_INT frame;
1016 int padding2;
1017 int outgoing_arguments_size;
1018 int red_zone_size;
1020 HOST_WIDE_INT to_allocate;
1021 /* The offsets relative to ARG_POINTER. */
1022 HOST_WIDE_INT frame_pointer_offset;
1023 HOST_WIDE_INT hard_frame_pointer_offset;
1024 HOST_WIDE_INT stack_pointer_offset;
1026 /* When save_regs_using_mov is set, emit prologue using
1027 move instead of push instructions. */
1028 bool save_regs_using_mov;
1031 /* Code model option. */
1032 enum cmodel ix86_cmodel;
1033 /* Asm dialect. */
1034 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1035 /* TLS dialects. */
1036 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1038 /* Which unit we are generating floating point math for. */
1039 enum fpmath_unit ix86_fpmath;
1041 /* Which cpu are we scheduling for. */
1042 enum processor_type ix86_tune;
1043 /* Which instruction set architecture to use. */
1044 enum processor_type ix86_arch;
1046 /* true if sse prefetch instruction is not NOOP. */
1047 int x86_prefetch_sse;
1049 /* ix86_regparm_string as a number */
1050 static int ix86_regparm;
1052 /* Preferred alignment for stack boundary in bits. */
1053 unsigned int ix86_preferred_stack_boundary;
1055 /* Values 1-5: see jump.c */
1056 int ix86_branch_cost;
1058 /* Variables which are this size or smaller are put in the data/bss
1059 or ldata/lbss sections. */
1061 int ix86_section_threshold = 65536;
1063 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1064 char internal_label_prefix[16];
1065 int internal_label_prefix_len;
1067 /* Table for BUILT_IN_NORMAL to BUILT_IN_MD mapping. */
1068 static GTY(()) tree ix86_builtin_function_variants[(int) END_BUILTINS];
1070 static bool ix86_handle_option (size_t, const char *, int);
1071 static void output_pic_addr_const (FILE *, rtx, int);
1072 static void put_condition_code (enum rtx_code, enum machine_mode,
1073 int, int, FILE *);
1074 static const char *get_some_local_dynamic_name (void);
1075 static int get_some_local_dynamic_name_1 (rtx *, void *);
1076 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1077 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1078 rtx *);
1079 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1080 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1081 enum machine_mode);
1082 static rtx get_thread_pointer (int);
1083 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1084 static void get_pc_thunk_name (char [32], unsigned int);
1085 static rtx gen_push (rtx);
1086 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
1087 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
1088 static struct machine_function * ix86_init_machine_status (void);
1089 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1090 static int ix86_nsaved_regs (void);
1091 static void ix86_emit_save_regs (void);
1092 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1093 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1094 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1095 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1096 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1097 static rtx ix86_expand_aligntest (rtx, int);
1098 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1099 static int ix86_issue_rate (void);
1100 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1101 static int ia32_multipass_dfa_lookahead (void);
1102 static void ix86_init_mmx_sse_builtins (void);
1103 static void ix86_init_sse_abi_builtins (void);
1104 static rtx x86_this_parameter (tree);
1105 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1106 HOST_WIDE_INT, tree);
1107 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1108 static void x86_file_start (void);
1109 static void ix86_reorg (void);
1110 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1111 static tree ix86_build_builtin_va_list (void);
1112 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1113 tree, int *, int);
1114 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1115 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1116 static bool ix86_vector_mode_supported_p (enum machine_mode);
1118 static int ix86_address_cost (rtx);
1119 static bool ix86_cannot_force_const_mem (rtx);
1120 static rtx ix86_delegitimize_address (rtx);
1122 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1124 struct builtin_description;
1125 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1126 tree, rtx);
1127 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1128 tree, rtx);
1129 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1130 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1131 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1132 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1133 static rtx safe_vector_operand (rtx, enum machine_mode);
1134 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1135 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1136 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1137 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1138 static int ix86_fp_comparison_cost (enum rtx_code code);
1139 static unsigned int ix86_select_alt_pic_regnum (void);
1140 static int ix86_save_reg (unsigned int, int);
1141 static void ix86_compute_frame_layout (struct ix86_frame *);
1142 static int ix86_comp_type_attributes (tree, tree);
1143 static int ix86_function_regparm (tree, tree);
1144 const struct attribute_spec ix86_attribute_table[];
1145 static bool ix86_function_ok_for_sibcall (tree, tree);
1146 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1147 static int ix86_value_regno (enum machine_mode, tree, tree);
1148 static bool contains_128bit_aligned_vector_p (tree);
1149 static rtx ix86_struct_value_rtx (tree, int);
1150 static bool ix86_ms_bitfield_layout_p (tree);
1151 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1152 static int extended_reg_mentioned_1 (rtx *, void *);
1153 static bool ix86_rtx_costs (rtx, int, int, int *);
1154 static int min_insn_size (rtx);
1155 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1156 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1157 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1158 tree, bool);
1159 static void ix86_init_builtins (void);
1160 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1161 static rtx ix86_expand_library_builtin (tree, rtx, rtx, enum machine_mode, int);
1162 static const char *ix86_mangle_fundamental_type (tree);
1163 static tree ix86_stack_protect_fail (void);
1164 static rtx ix86_internal_arg_pointer (void);
1165 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1167 /* This function is only used on Solaris. */
1168 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1169 ATTRIBUTE_UNUSED;
1171 /* Register class used for passing given 64bit part of the argument.
1172 These represent classes as documented by the PS ABI, with the exception
1173 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1174 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1176 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1177 whenever possible (upper half does contain padding).
1179 enum x86_64_reg_class
1181 X86_64_NO_CLASS,
1182 X86_64_INTEGER_CLASS,
1183 X86_64_INTEGERSI_CLASS,
1184 X86_64_SSE_CLASS,
1185 X86_64_SSESF_CLASS,
1186 X86_64_SSEDF_CLASS,
1187 X86_64_SSEUP_CLASS,
1188 X86_64_X87_CLASS,
1189 X86_64_X87UP_CLASS,
1190 X86_64_COMPLEX_X87_CLASS,
1191 X86_64_MEMORY_CLASS
1193 static const char * const x86_64_reg_class_name[] = {
1194 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1195 "sseup", "x87", "x87up", "cplx87", "no"
1198 #define MAX_CLASSES 4
1200 /* Table of constants used by fldpi, fldln2, etc.... */
1201 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1202 static bool ext_80387_constants_init = 0;
1203 static void init_ext_80387_constants (void);
1204 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1205 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1206 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1207 static section *x86_64_elf_select_section (tree decl, int reloc,
1208 unsigned HOST_WIDE_INT align)
1209 ATTRIBUTE_UNUSED;
1211 /* Initialize the GCC target structure. */
1212 #undef TARGET_ATTRIBUTE_TABLE
1213 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1214 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1215 # undef TARGET_MERGE_DECL_ATTRIBUTES
1216 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1217 #endif
1219 #undef TARGET_COMP_TYPE_ATTRIBUTES
1220 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1222 #undef TARGET_INIT_BUILTINS
1223 #define TARGET_INIT_BUILTINS ix86_init_builtins
1224 #undef TARGET_EXPAND_BUILTIN
1225 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1226 #undef TARGET_EXPAND_LIBRARY_BUILTIN
1227 #define TARGET_EXPAND_LIBRARY_BUILTIN ix86_expand_library_builtin
1229 #undef TARGET_ASM_FUNCTION_EPILOGUE
1230 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1232 #undef TARGET_ENCODE_SECTION_INFO
1233 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1234 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1235 #else
1236 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1237 #endif
1239 #undef TARGET_ASM_OPEN_PAREN
1240 #define TARGET_ASM_OPEN_PAREN ""
1241 #undef TARGET_ASM_CLOSE_PAREN
1242 #define TARGET_ASM_CLOSE_PAREN ""
1244 #undef TARGET_ASM_ALIGNED_HI_OP
1245 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1246 #undef TARGET_ASM_ALIGNED_SI_OP
1247 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1248 #ifdef ASM_QUAD
1249 #undef TARGET_ASM_ALIGNED_DI_OP
1250 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1251 #endif
1253 #undef TARGET_ASM_UNALIGNED_HI_OP
1254 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1255 #undef TARGET_ASM_UNALIGNED_SI_OP
1256 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1257 #undef TARGET_ASM_UNALIGNED_DI_OP
1258 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1260 #undef TARGET_SCHED_ADJUST_COST
1261 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1262 #undef TARGET_SCHED_ISSUE_RATE
1263 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1264 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1265 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1266 ia32_multipass_dfa_lookahead
1268 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1269 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1271 #ifdef HAVE_AS_TLS
1272 #undef TARGET_HAVE_TLS
1273 #define TARGET_HAVE_TLS true
1274 #endif
1275 #undef TARGET_CANNOT_FORCE_CONST_MEM
1276 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1277 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1278 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1280 #undef TARGET_DELEGITIMIZE_ADDRESS
1281 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1283 #undef TARGET_MS_BITFIELD_LAYOUT_P
1284 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1286 #if TARGET_MACHO
1287 #undef TARGET_BINDS_LOCAL_P
1288 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1289 #endif
1291 #undef TARGET_ASM_OUTPUT_MI_THUNK
1292 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1293 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1294 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1296 #undef TARGET_ASM_FILE_START
1297 #define TARGET_ASM_FILE_START x86_file_start
1299 #undef TARGET_DEFAULT_TARGET_FLAGS
1300 #define TARGET_DEFAULT_TARGET_FLAGS \
1301 (TARGET_DEFAULT \
1302 | TARGET_64BIT_DEFAULT \
1303 | TARGET_SUBTARGET_DEFAULT \
1304 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1306 #undef TARGET_HANDLE_OPTION
1307 #define TARGET_HANDLE_OPTION ix86_handle_option
1309 #undef TARGET_RTX_COSTS
1310 #define TARGET_RTX_COSTS ix86_rtx_costs
1311 #undef TARGET_ADDRESS_COST
1312 #define TARGET_ADDRESS_COST ix86_address_cost
1314 #undef TARGET_FIXED_CONDITION_CODE_REGS
1315 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1316 #undef TARGET_CC_MODES_COMPATIBLE
1317 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1319 #undef TARGET_MACHINE_DEPENDENT_REORG
1320 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1322 #undef TARGET_BUILD_BUILTIN_VA_LIST
1323 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1325 #undef TARGET_MD_ASM_CLOBBERS
1326 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1328 #undef TARGET_PROMOTE_PROTOTYPES
1329 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1330 #undef TARGET_STRUCT_VALUE_RTX
1331 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1332 #undef TARGET_SETUP_INCOMING_VARARGS
1333 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1334 #undef TARGET_MUST_PASS_IN_STACK
1335 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1336 #undef TARGET_PASS_BY_REFERENCE
1337 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1338 #undef TARGET_INTERNAL_ARG_POINTER
1339 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1340 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1341 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1343 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1344 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1346 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1347 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1349 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1350 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1352 #ifdef HAVE_AS_TLS
1353 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1354 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1355 #endif
1357 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1358 #undef TARGET_INSERT_ATTRIBUTES
1359 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1360 #endif
1362 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1363 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1365 #undef TARGET_STACK_PROTECT_FAIL
1366 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1368 #undef TARGET_FUNCTION_VALUE
1369 #define TARGET_FUNCTION_VALUE ix86_function_value
1371 struct gcc_target targetm = TARGET_INITIALIZER;
1374 /* The svr4 ABI for the i386 says that records and unions are returned
1375 in memory. */
1376 #ifndef DEFAULT_PCC_STRUCT_RETURN
1377 #define DEFAULT_PCC_STRUCT_RETURN 1
1378 #endif
1380 /* Implement TARGET_HANDLE_OPTION. */
1382 static bool
1383 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1385 switch (code)
1387 case OPT_m3dnow:
1388 if (!value)
1390 target_flags &= ~MASK_3DNOW_A;
1391 target_flags_explicit |= MASK_3DNOW_A;
1393 return true;
1395 case OPT_mmmx:
1396 if (!value)
1398 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1399 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1401 return true;
1403 case OPT_msse:
1404 if (!value)
1406 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1407 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1409 return true;
1411 case OPT_msse2:
1412 if (!value)
1414 target_flags &= ~MASK_SSE3;
1415 target_flags_explicit |= MASK_SSE3;
1417 return true;
1419 default:
1420 return true;
1424 /* Sometimes certain combinations of command options do not make
1425 sense on a particular target machine. You can define a macro
1426 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1427 defined, is executed once just after all the command options have
1428 been parsed.
1430 Don't use this macro to turn on various extra optimizations for
1431 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1433 void
1434 override_options (void)
1436 int i;
1437 int ix86_tune_defaulted = 0;
1439 /* Comes from final.c -- no real reason to change it. */
1440 #define MAX_CODE_ALIGN 16
1442 static struct ptt
1444 const struct processor_costs *cost; /* Processor costs */
1445 const int target_enable; /* Target flags to enable. */
1446 const int target_disable; /* Target flags to disable. */
1447 const int align_loop; /* Default alignments. */
1448 const int align_loop_max_skip;
1449 const int align_jump;
1450 const int align_jump_max_skip;
1451 const int align_func;
1453 const processor_target_table[PROCESSOR_max] =
1455 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1456 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1457 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1458 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1459 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1460 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1461 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1462 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1463 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1464 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1465 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1468 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1469 static struct pta
1471 const char *const name; /* processor name or nickname. */
1472 const enum processor_type processor;
1473 const enum pta_flags
1475 PTA_SSE = 1,
1476 PTA_SSE2 = 2,
1477 PTA_SSE3 = 4,
1478 PTA_MMX = 8,
1479 PTA_PREFETCH_SSE = 16,
1480 PTA_3DNOW = 32,
1481 PTA_3DNOW_A = 64,
1482 PTA_64BIT = 128
1483 } flags;
1485 const processor_alias_table[] =
1487 {"i386", PROCESSOR_I386, 0},
1488 {"i486", PROCESSOR_I486, 0},
1489 {"i586", PROCESSOR_PENTIUM, 0},
1490 {"pentium", PROCESSOR_PENTIUM, 0},
1491 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1492 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1493 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1494 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1495 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1496 {"i686", PROCESSOR_PENTIUMPRO, 0},
1497 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1498 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1499 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1500 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1501 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1502 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1503 | PTA_MMX | PTA_PREFETCH_SSE},
1504 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1505 | PTA_MMX | PTA_PREFETCH_SSE},
1506 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1507 | PTA_MMX | PTA_PREFETCH_SSE},
1508 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1509 | PTA_MMX | PTA_PREFETCH_SSE},
1510 {"k6", PROCESSOR_K6, PTA_MMX},
1511 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1512 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1513 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1514 | PTA_3DNOW_A},
1515 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1516 | PTA_3DNOW | PTA_3DNOW_A},
1517 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1518 | PTA_3DNOW_A | PTA_SSE},
1519 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1520 | PTA_3DNOW_A | PTA_SSE},
1521 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1522 | PTA_3DNOW_A | PTA_SSE},
1523 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1524 | PTA_SSE | PTA_SSE2 },
1525 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1526 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1527 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1528 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1529 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1530 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1531 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1532 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1533 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1534 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1537 int const pta_size = ARRAY_SIZE (processor_alias_table);
1539 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1540 SUBTARGET_OVERRIDE_OPTIONS;
1541 #endif
1543 /* Set the default values for switches whose default depends on TARGET_64BIT
1544 in case they weren't overwritten by command line options. */
1545 if (TARGET_64BIT)
1547 if (flag_omit_frame_pointer == 2)
1548 flag_omit_frame_pointer = 1;
1549 if (flag_asynchronous_unwind_tables == 2)
1550 flag_asynchronous_unwind_tables = 1;
1551 if (flag_pcc_struct_return == 2)
1552 flag_pcc_struct_return = 0;
1554 else
1556 if (flag_omit_frame_pointer == 2)
1557 flag_omit_frame_pointer = 0;
1558 if (flag_asynchronous_unwind_tables == 2)
1559 flag_asynchronous_unwind_tables = 0;
1560 if (flag_pcc_struct_return == 2)
1561 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1564 /* Need to check -mtune=generic first. */
1565 if (ix86_tune_string)
1567 if (!strcmp (ix86_tune_string, "generic")
1568 || !strcmp (ix86_tune_string, "i686"))
1570 if (TARGET_64BIT)
1571 ix86_tune_string = "generic64";
1572 else
1573 ix86_tune_string = "generic32";
1575 else if (!strncmp (ix86_tune_string, "generic", 7))
1576 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1578 else
1580 if (ix86_arch_string)
1581 ix86_tune_string = ix86_arch_string;
1582 if (!ix86_tune_string)
1584 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1585 ix86_tune_defaulted = 1;
1588 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1589 need to use a sensible tune option. */
1590 if (!strcmp (ix86_tune_string, "generic")
1591 || !strcmp (ix86_tune_string, "x86-64")
1592 || !strcmp (ix86_tune_string, "i686"))
1594 if (TARGET_64BIT)
1595 ix86_tune_string = "generic64";
1596 else
1597 ix86_tune_string = "generic32";
1600 if (!strcmp (ix86_tune_string, "x86-64"))
1601 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1602 "-mtune=generic instead as appropriate.");
1604 if (!ix86_arch_string)
1605 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1606 if (!strcmp (ix86_arch_string, "generic"))
1607 error ("generic CPU can be used only for -mtune= switch");
1608 if (!strncmp (ix86_arch_string, "generic", 7))
1609 error ("bad value (%s) for -march= switch", ix86_arch_string);
1611 if (ix86_cmodel_string != 0)
1613 if (!strcmp (ix86_cmodel_string, "small"))
1614 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1615 else if (!strcmp (ix86_cmodel_string, "medium"))
1616 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1617 else if (flag_pic)
1618 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1619 else if (!strcmp (ix86_cmodel_string, "32"))
1620 ix86_cmodel = CM_32;
1621 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1622 ix86_cmodel = CM_KERNEL;
1623 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1624 ix86_cmodel = CM_LARGE;
1625 else
1626 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1628 else
1630 ix86_cmodel = CM_32;
1631 if (TARGET_64BIT)
1632 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1634 if (ix86_asm_string != 0)
1636 if (! TARGET_MACHO
1637 && !strcmp (ix86_asm_string, "intel"))
1638 ix86_asm_dialect = ASM_INTEL;
1639 else if (!strcmp (ix86_asm_string, "att"))
1640 ix86_asm_dialect = ASM_ATT;
1641 else
1642 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1644 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1645 error ("code model %qs not supported in the %s bit mode",
1646 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1647 if (ix86_cmodel == CM_LARGE)
1648 sorry ("code model %<large%> not supported yet");
1649 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1650 sorry ("%i-bit mode not compiled in",
1651 (target_flags & MASK_64BIT) ? 64 : 32);
1653 for (i = 0; i < pta_size; i++)
1654 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1656 ix86_arch = processor_alias_table[i].processor;
1657 /* Default cpu tuning to the architecture. */
1658 ix86_tune = ix86_arch;
1659 if (processor_alias_table[i].flags & PTA_MMX
1660 && !(target_flags_explicit & MASK_MMX))
1661 target_flags |= MASK_MMX;
1662 if (processor_alias_table[i].flags & PTA_3DNOW
1663 && !(target_flags_explicit & MASK_3DNOW))
1664 target_flags |= MASK_3DNOW;
1665 if (processor_alias_table[i].flags & PTA_3DNOW_A
1666 && !(target_flags_explicit & MASK_3DNOW_A))
1667 target_flags |= MASK_3DNOW_A;
1668 if (processor_alias_table[i].flags & PTA_SSE
1669 && !(target_flags_explicit & MASK_SSE))
1670 target_flags |= MASK_SSE;
1671 if (processor_alias_table[i].flags & PTA_SSE2
1672 && !(target_flags_explicit & MASK_SSE2))
1673 target_flags |= MASK_SSE2;
1674 if (processor_alias_table[i].flags & PTA_SSE3
1675 && !(target_flags_explicit & MASK_SSE3))
1676 target_flags |= MASK_SSE3;
1677 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1678 x86_prefetch_sse = true;
1679 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1680 error ("CPU you selected does not support x86-64 "
1681 "instruction set");
1682 break;
1685 if (i == pta_size)
1686 error ("bad value (%s) for -march= switch", ix86_arch_string);
1688 for (i = 0; i < pta_size; i++)
1689 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1691 ix86_tune = processor_alias_table[i].processor;
1692 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1694 if (ix86_tune_defaulted)
1696 ix86_tune_string = "x86-64";
1697 for (i = 0; i < pta_size; i++)
1698 if (! strcmp (ix86_tune_string,
1699 processor_alias_table[i].name))
1700 break;
1701 ix86_tune = processor_alias_table[i].processor;
1703 else
1704 error ("CPU you selected does not support x86-64 "
1705 "instruction set");
1707 /* Intel CPUs have always interpreted SSE prefetch instructions as
1708 NOPs; so, we can enable SSE prefetch instructions even when
1709 -mtune (rather than -march) points us to a processor that has them.
1710 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1711 higher processors. */
1712 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1713 x86_prefetch_sse = true;
1714 break;
1716 if (i == pta_size)
1717 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1719 if (optimize_size)
1720 ix86_cost = &size_cost;
1721 else
1722 ix86_cost = processor_target_table[ix86_tune].cost;
1723 target_flags |= processor_target_table[ix86_tune].target_enable;
1724 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1726 /* Arrange to set up i386_stack_locals for all functions. */
1727 init_machine_status = ix86_init_machine_status;
1729 /* Validate -mregparm= value. */
1730 if (ix86_regparm_string)
1732 i = atoi (ix86_regparm_string);
1733 if (i < 0 || i > REGPARM_MAX)
1734 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1735 else
1736 ix86_regparm = i;
1738 else
1739 if (TARGET_64BIT)
1740 ix86_regparm = REGPARM_MAX;
1742 /* If the user has provided any of the -malign-* options,
1743 warn and use that value only if -falign-* is not set.
1744 Remove this code in GCC 3.2 or later. */
1745 if (ix86_align_loops_string)
1747 warning (0, "-malign-loops is obsolete, use -falign-loops");
1748 if (align_loops == 0)
1750 i = atoi (ix86_align_loops_string);
1751 if (i < 0 || i > MAX_CODE_ALIGN)
1752 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1753 else
1754 align_loops = 1 << i;
1758 if (ix86_align_jumps_string)
1760 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1761 if (align_jumps == 0)
1763 i = atoi (ix86_align_jumps_string);
1764 if (i < 0 || i > MAX_CODE_ALIGN)
1765 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1766 else
1767 align_jumps = 1 << i;
1771 if (ix86_align_funcs_string)
1773 warning (0, "-malign-functions is obsolete, use -falign-functions");
1774 if (align_functions == 0)
1776 i = atoi (ix86_align_funcs_string);
1777 if (i < 0 || i > MAX_CODE_ALIGN)
1778 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1779 else
1780 align_functions = 1 << i;
1784 /* Default align_* from the processor table. */
1785 if (align_loops == 0)
1787 align_loops = processor_target_table[ix86_tune].align_loop;
1788 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1790 if (align_jumps == 0)
1792 align_jumps = processor_target_table[ix86_tune].align_jump;
1793 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1795 if (align_functions == 0)
1797 align_functions = processor_target_table[ix86_tune].align_func;
1800 /* Validate -mpreferred-stack-boundary= value, or provide default.
1801 The default of 128 bits is for Pentium III's SSE __m128, but we
1802 don't want additional code to keep the stack aligned when
1803 optimizing for code size. */
1804 ix86_preferred_stack_boundary = ((TARGET_64BIT || TARGET_MACHO || !optimize_size)
1805 ? 128 : 32);
1806 if (ix86_preferred_stack_boundary_string)
1808 i = atoi (ix86_preferred_stack_boundary_string);
1809 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1810 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1811 TARGET_64BIT ? 4 : 2);
1812 else
1813 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1816 /* Validate -mbranch-cost= value, or provide default. */
1817 ix86_branch_cost = ix86_cost->branch_cost;
1818 if (ix86_branch_cost_string)
1820 i = atoi (ix86_branch_cost_string);
1821 if (i < 0 || i > 5)
1822 error ("-mbranch-cost=%d is not between 0 and 5", i);
1823 else
1824 ix86_branch_cost = i;
1826 if (ix86_section_threshold_string)
1828 i = atoi (ix86_section_threshold_string);
1829 if (i < 0)
1830 error ("-mlarge-data-threshold=%d is negative", i);
1831 else
1832 ix86_section_threshold = i;
1835 if (ix86_tls_dialect_string)
1837 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1838 ix86_tls_dialect = TLS_DIALECT_GNU;
1839 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1840 ix86_tls_dialect = TLS_DIALECT_GNU2;
1841 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1842 ix86_tls_dialect = TLS_DIALECT_SUN;
1843 else
1844 error ("bad value (%s) for -mtls-dialect= switch",
1845 ix86_tls_dialect_string);
1848 /* Keep nonleaf frame pointers. */
1849 if (flag_omit_frame_pointer)
1850 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1851 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1852 flag_omit_frame_pointer = 1;
1854 /* If we're doing fast math, we don't care about comparison order
1855 wrt NaNs. This lets us use a shorter comparison sequence. */
1856 if (flag_unsafe_math_optimizations)
1857 target_flags &= ~MASK_IEEE_FP;
1859 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1860 since the insns won't need emulation. */
1861 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1862 target_flags &= ~MASK_NO_FANCY_MATH_387;
1864 /* Likewise, if the target doesn't have a 387, or we've specified
1865 software floating point, don't use 387 inline intrinsics. */
1866 if (!TARGET_80387)
1867 target_flags |= MASK_NO_FANCY_MATH_387;
1869 /* Turn on SSE2 builtins for -msse3. */
1870 if (TARGET_SSE3)
1871 target_flags |= MASK_SSE2;
1873 /* Turn on SSE builtins for -msse2. */
1874 if (TARGET_SSE2)
1875 target_flags |= MASK_SSE;
1877 /* Turn on MMX builtins for -msse. */
1878 if (TARGET_SSE)
1880 target_flags |= MASK_MMX & ~target_flags_explicit;
1881 x86_prefetch_sse = true;
1884 /* Turn on MMX builtins for 3Dnow. */
1885 if (TARGET_3DNOW)
1886 target_flags |= MASK_MMX;
1888 if (TARGET_64BIT)
1890 if (TARGET_ALIGN_DOUBLE)
1891 error ("-malign-double makes no sense in the 64bit mode");
1892 if (TARGET_RTD)
1893 error ("-mrtd calling convention not supported in the 64bit mode");
1895 /* Enable by default the SSE and MMX builtins. Do allow the user to
1896 explicitly disable any of these. In particular, disabling SSE and
1897 MMX for kernel code is extremely useful. */
1898 target_flags
1899 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1900 & ~target_flags_explicit);
1902 else
1904 /* i386 ABI does not specify red zone. It still makes sense to use it
1905 when programmer takes care to stack from being destroyed. */
1906 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1907 target_flags |= MASK_NO_RED_ZONE;
1910 /* Accept -msseregparm only if at least SSE support is enabled. */
1911 if (TARGET_SSEREGPARM
1912 && ! TARGET_SSE)
1913 error ("-msseregparm used without SSE enabled");
1915 /* Accept -msselibm only if at least SSE support is enabled. */
1916 if (TARGET_SSELIBM
1917 && ! TARGET_SSE2)
1918 error ("-msselibm used without SSE2 enabled");
1920 /* Ignore -msselibm on 64bit targets. */
1921 if (TARGET_SSELIBM
1922 && TARGET_64BIT)
1923 error ("-msselibm used on a 64bit target");
1925 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1927 if (ix86_fpmath_string != 0)
1929 if (! strcmp (ix86_fpmath_string, "387"))
1930 ix86_fpmath = FPMATH_387;
1931 else if (! strcmp (ix86_fpmath_string, "sse"))
1933 if (!TARGET_SSE)
1935 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1936 ix86_fpmath = FPMATH_387;
1938 else
1939 ix86_fpmath = FPMATH_SSE;
1941 else if (! strcmp (ix86_fpmath_string, "387,sse")
1942 || ! strcmp (ix86_fpmath_string, "sse,387"))
1944 if (!TARGET_SSE)
1946 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1947 ix86_fpmath = FPMATH_387;
1949 else if (!TARGET_80387)
1951 warning (0, "387 instruction set disabled, using SSE arithmetics");
1952 ix86_fpmath = FPMATH_SSE;
1954 else
1955 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1957 else
1958 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1961 /* If the i387 is disabled, then do not return values in it. */
1962 if (!TARGET_80387)
1963 target_flags &= ~MASK_FLOAT_RETURNS;
1965 if ((x86_accumulate_outgoing_args & TUNEMASK)
1966 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1967 && !optimize_size)
1968 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1970 /* ??? Unwind info is not correct around the CFG unless either a frame
1971 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1972 unwind info generation to be aware of the CFG and propagating states
1973 around edges. */
1974 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1975 || flag_exceptions || flag_non_call_exceptions)
1976 && flag_omit_frame_pointer
1977 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1979 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1980 warning (0, "unwind tables currently require either a frame pointer "
1981 "or -maccumulate-outgoing-args for correctness");
1982 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1985 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1987 char *p;
1988 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1989 p = strchr (internal_label_prefix, 'X');
1990 internal_label_prefix_len = p - internal_label_prefix;
1991 *p = '\0';
1994 /* When scheduling description is not available, disable scheduler pass
1995 so it won't slow down the compilation and make x87 code slower. */
1996 if (!TARGET_SCHEDULE)
1997 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2000 /* switch to the appropriate section for output of DECL.
2001 DECL is either a `VAR_DECL' node or a constant of some sort.
2002 RELOC indicates whether forming the initial value of DECL requires
2003 link-time relocations. */
2005 static section *
2006 x86_64_elf_select_section (tree decl, int reloc,
2007 unsigned HOST_WIDE_INT align)
2009 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2010 && ix86_in_large_data_p (decl))
2012 const char *sname = NULL;
2013 unsigned int flags = SECTION_WRITE;
2014 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2016 case SECCAT_DATA:
2017 sname = ".ldata";
2018 break;
2019 case SECCAT_DATA_REL:
2020 sname = ".ldata.rel";
2021 break;
2022 case SECCAT_DATA_REL_LOCAL:
2023 sname = ".ldata.rel.local";
2024 break;
2025 case SECCAT_DATA_REL_RO:
2026 sname = ".ldata.rel.ro";
2027 break;
2028 case SECCAT_DATA_REL_RO_LOCAL:
2029 sname = ".ldata.rel.ro.local";
2030 break;
2031 case SECCAT_BSS:
2032 sname = ".lbss";
2033 flags |= SECTION_BSS;
2034 break;
2035 case SECCAT_RODATA:
2036 case SECCAT_RODATA_MERGE_STR:
2037 case SECCAT_RODATA_MERGE_STR_INIT:
2038 case SECCAT_RODATA_MERGE_CONST:
2039 sname = ".lrodata";
2040 flags = 0;
2041 break;
2042 case SECCAT_SRODATA:
2043 case SECCAT_SDATA:
2044 case SECCAT_SBSS:
2045 gcc_unreachable ();
2046 case SECCAT_TEXT:
2047 case SECCAT_TDATA:
2048 case SECCAT_TBSS:
2049 /* We don't split these for medium model. Place them into
2050 default sections and hope for best. */
2051 break;
2053 if (sname)
2055 /* We might get called with string constants, but get_named_section
2056 doesn't like them as they are not DECLs. Also, we need to set
2057 flags in that case. */
2058 if (!DECL_P (decl))
2059 return get_section (sname, flags, NULL);
2060 return get_named_section (decl, sname, reloc);
2063 return default_elf_select_section (decl, reloc, align);
2066 /* Build up a unique section name, expressed as a
2067 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2068 RELOC indicates whether the initial value of EXP requires
2069 link-time relocations. */
2071 static void
2072 x86_64_elf_unique_section (tree decl, int reloc)
2074 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2075 && ix86_in_large_data_p (decl))
2077 const char *prefix = NULL;
2078 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2079 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2081 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2083 case SECCAT_DATA:
2084 case SECCAT_DATA_REL:
2085 case SECCAT_DATA_REL_LOCAL:
2086 case SECCAT_DATA_REL_RO:
2087 case SECCAT_DATA_REL_RO_LOCAL:
2088 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2089 break;
2090 case SECCAT_BSS:
2091 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2092 break;
2093 case SECCAT_RODATA:
2094 case SECCAT_RODATA_MERGE_STR:
2095 case SECCAT_RODATA_MERGE_STR_INIT:
2096 case SECCAT_RODATA_MERGE_CONST:
2097 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2098 break;
2099 case SECCAT_SRODATA:
2100 case SECCAT_SDATA:
2101 case SECCAT_SBSS:
2102 gcc_unreachable ();
2103 case SECCAT_TEXT:
2104 case SECCAT_TDATA:
2105 case SECCAT_TBSS:
2106 /* We don't split these for medium model. Place them into
2107 default sections and hope for best. */
2108 break;
2110 if (prefix)
2112 const char *name;
2113 size_t nlen, plen;
2114 char *string;
2115 plen = strlen (prefix);
2117 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2118 name = targetm.strip_name_encoding (name);
2119 nlen = strlen (name);
2121 string = alloca (nlen + plen + 1);
2122 memcpy (string, prefix, plen);
2123 memcpy (string + plen, name, nlen + 1);
2125 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2126 return;
2129 default_unique_section (decl, reloc);
2132 #ifdef COMMON_ASM_OP
2133 /* This says how to output assembler code to declare an
2134 uninitialized external linkage data object.
2136 For medium model x86-64 we need to use .largecomm opcode for
2137 large objects. */
2138 void
2139 x86_elf_aligned_common (FILE *file,
2140 const char *name, unsigned HOST_WIDE_INT size,
2141 int align)
2143 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2144 && size > (unsigned int)ix86_section_threshold)
2145 fprintf (file, ".largecomm\t");
2146 else
2147 fprintf (file, "%s", COMMON_ASM_OP);
2148 assemble_name (file, name);
2149 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2150 size, align / BITS_PER_UNIT);
2153 /* Utility function for targets to use in implementing
2154 ASM_OUTPUT_ALIGNED_BSS. */
2156 void
2157 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2158 const char *name, unsigned HOST_WIDE_INT size,
2159 int align)
2161 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2162 && size > (unsigned int)ix86_section_threshold)
2163 switch_to_section (get_named_section (decl, ".lbss", 0));
2164 else
2165 switch_to_section (bss_section);
2166 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2167 #ifdef ASM_DECLARE_OBJECT_NAME
2168 last_assemble_variable_decl = decl;
2169 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2170 #else
2171 /* Standard thing is just output label for the object. */
2172 ASM_OUTPUT_LABEL (file, name);
2173 #endif /* ASM_DECLARE_OBJECT_NAME */
2174 ASM_OUTPUT_SKIP (file, size ? size : 1);
2176 #endif
2178 void
2179 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2181 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2182 make the problem with not enough registers even worse. */
2183 #ifdef INSN_SCHEDULING
2184 if (level > 1)
2185 flag_schedule_insns = 0;
2186 #endif
2188 if (TARGET_MACHO)
2189 /* The Darwin libraries never set errno, so we might as well
2190 avoid calling them when that's the only reason we would. */
2191 flag_errno_math = 0;
2193 /* The default values of these switches depend on the TARGET_64BIT
2194 that is not known at this moment. Mark these values with 2 and
2195 let user the to override these. In case there is no command line option
2196 specifying them, we will set the defaults in override_options. */
2197 if (optimize >= 1)
2198 flag_omit_frame_pointer = 2;
2199 flag_pcc_struct_return = 2;
2200 flag_asynchronous_unwind_tables = 2;
2201 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2202 SUBTARGET_OPTIMIZATION_OPTIONS;
2203 #endif
2206 /* Table of valid machine attributes. */
2207 const struct attribute_spec ix86_attribute_table[] =
2209 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2210 /* Stdcall attribute says callee is responsible for popping arguments
2211 if they are not variable. */
2212 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2213 /* Fastcall attribute says callee is responsible for popping arguments
2214 if they are not variable. */
2215 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2216 /* Cdecl attribute says the callee is a normal C declaration */
2217 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2218 /* Regparm attribute specifies how many integer arguments are to be
2219 passed in registers. */
2220 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2221 /* Sseregparm attribute says we are using x86_64 calling conventions
2222 for FP arguments. */
2223 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2224 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2225 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2226 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2227 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2228 #endif
2229 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2230 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2231 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2232 SUBTARGET_ATTRIBUTE_TABLE,
2233 #endif
2234 { NULL, 0, 0, false, false, false, NULL }
2237 /* Decide whether we can make a sibling call to a function. DECL is the
2238 declaration of the function being targeted by the call and EXP is the
2239 CALL_EXPR representing the call. */
2241 static bool
2242 ix86_function_ok_for_sibcall (tree decl, tree exp)
2244 tree func;
2245 rtx a, b;
2247 /* If we are generating position-independent code, we cannot sibcall
2248 optimize any indirect call, or a direct call to a global function,
2249 as the PLT requires %ebx be live. */
2250 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2251 return false;
2253 if (decl)
2254 func = decl;
2255 else
2257 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2258 if (POINTER_TYPE_P (func))
2259 func = TREE_TYPE (func);
2262 /* Check that the return value locations are the same. Like
2263 if we are returning floats on the 80387 register stack, we cannot
2264 make a sibcall from a function that doesn't return a float to a
2265 function that does or, conversely, from a function that does return
2266 a float to a function that doesn't; the necessary stack adjustment
2267 would not be executed. This is also the place we notice
2268 differences in the return value ABI. Note that it is ok for one
2269 of the functions to have void return type as long as the return
2270 value of the other is passed in a register. */
2271 a = ix86_function_value (TREE_TYPE (exp), func, false);
2272 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2273 cfun->decl, false);
2274 if (STACK_REG_P (a) || STACK_REG_P (b))
2276 if (!rtx_equal_p (a, b))
2277 return false;
2279 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2281 else if (!rtx_equal_p (a, b))
2282 return false;
2284 /* If this call is indirect, we'll need to be able to use a call-clobbered
2285 register for the address of the target function. Make sure that all
2286 such registers are not used for passing parameters. */
2287 if (!decl && !TARGET_64BIT)
2289 tree type;
2291 /* We're looking at the CALL_EXPR, we need the type of the function. */
2292 type = TREE_OPERAND (exp, 0); /* pointer expression */
2293 type = TREE_TYPE (type); /* pointer type */
2294 type = TREE_TYPE (type); /* function type */
2296 if (ix86_function_regparm (type, NULL) >= 3)
2298 /* ??? Need to count the actual number of registers to be used,
2299 not the possible number of registers. Fix later. */
2300 return false;
2304 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2305 /* Dllimport'd functions are also called indirectly. */
2306 if (decl && DECL_DLLIMPORT_P (decl)
2307 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2308 return false;
2309 #endif
2311 /* If we forced aligned the stack, then sibcalling would unalign the
2312 stack, which may break the called function. */
2313 if (cfun->machine->force_align_arg_pointer)
2314 return false;
2316 /* Otherwise okay. That also includes certain types of indirect calls. */
2317 return true;
2320 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2321 calling convention attributes;
2322 arguments as in struct attribute_spec.handler. */
2324 static tree
2325 ix86_handle_cconv_attribute (tree *node, tree name,
2326 tree args,
2327 int flags ATTRIBUTE_UNUSED,
2328 bool *no_add_attrs)
2330 if (TREE_CODE (*node) != FUNCTION_TYPE
2331 && TREE_CODE (*node) != METHOD_TYPE
2332 && TREE_CODE (*node) != FIELD_DECL
2333 && TREE_CODE (*node) != TYPE_DECL)
2335 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2336 IDENTIFIER_POINTER (name));
2337 *no_add_attrs = true;
2338 return NULL_TREE;
2341 /* Can combine regparm with all attributes but fastcall. */
2342 if (is_attribute_p ("regparm", name))
2344 tree cst;
2346 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2348 error ("fastcall and regparm attributes are not compatible");
2351 cst = TREE_VALUE (args);
2352 if (TREE_CODE (cst) != INTEGER_CST)
2354 warning (OPT_Wattributes,
2355 "%qs attribute requires an integer constant argument",
2356 IDENTIFIER_POINTER (name));
2357 *no_add_attrs = true;
2359 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2361 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2362 IDENTIFIER_POINTER (name), REGPARM_MAX);
2363 *no_add_attrs = true;
2366 return NULL_TREE;
2369 if (TARGET_64BIT)
2371 warning (OPT_Wattributes, "%qs attribute ignored",
2372 IDENTIFIER_POINTER (name));
2373 *no_add_attrs = true;
2374 return NULL_TREE;
2377 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2378 if (is_attribute_p ("fastcall", name))
2380 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2382 error ("fastcall and cdecl attributes are not compatible");
2384 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2386 error ("fastcall and stdcall attributes are not compatible");
2388 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2390 error ("fastcall and regparm attributes are not compatible");
2394 /* Can combine stdcall with fastcall (redundant), regparm and
2395 sseregparm. */
2396 else if (is_attribute_p ("stdcall", name))
2398 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2400 error ("stdcall and cdecl attributes are not compatible");
2402 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2404 error ("stdcall and fastcall attributes are not compatible");
2408 /* Can combine cdecl with regparm and sseregparm. */
2409 else if (is_attribute_p ("cdecl", name))
2411 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2413 error ("stdcall and cdecl attributes are not compatible");
2415 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2417 error ("fastcall and cdecl attributes are not compatible");
2421 /* Can combine sseregparm with all attributes. */
2423 return NULL_TREE;
2426 /* Return 0 if the attributes for two types are incompatible, 1 if they
2427 are compatible, and 2 if they are nearly compatible (which causes a
2428 warning to be generated). */
2430 static int
2431 ix86_comp_type_attributes (tree type1, tree type2)
2433 /* Check for mismatch of non-default calling convention. */
2434 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2436 if (TREE_CODE (type1) != FUNCTION_TYPE)
2437 return 1;
2439 /* Check for mismatched fastcall/regparm types. */
2440 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2441 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2442 || (ix86_function_regparm (type1, NULL)
2443 != ix86_function_regparm (type2, NULL)))
2444 return 0;
2446 /* Check for mismatched sseregparm types. */
2447 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2448 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2449 return 0;
2451 /* Check for mismatched return types (cdecl vs stdcall). */
2452 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2453 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2454 return 0;
2456 return 1;
2459 /* Return the regparm value for a function with the indicated TYPE and DECL.
2460 DECL may be NULL when calling function indirectly
2461 or considering a libcall. */
2463 static int
2464 ix86_function_regparm (tree type, tree decl)
2466 tree attr;
2467 int regparm = ix86_regparm;
2468 bool user_convention = false;
2470 if (!TARGET_64BIT)
2472 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2473 if (attr)
2475 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2476 user_convention = true;
2479 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2481 regparm = 2;
2482 user_convention = true;
2485 /* Use register calling convention for local functions when possible. */
2486 if (!TARGET_64BIT && !user_convention && decl
2487 && flag_unit_at_a_time && !profile_flag)
2489 struct cgraph_local_info *i = cgraph_local_info (decl);
2490 if (i && i->local)
2492 int local_regparm, globals = 0, regno;
2494 /* Make sure no regparm register is taken by a global register
2495 variable. */
2496 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2497 if (global_regs[local_regparm])
2498 break;
2499 /* We can't use regparm(3) for nested functions as these use
2500 static chain pointer in third argument. */
2501 if (local_regparm == 3
2502 && decl_function_context (decl)
2503 && !DECL_NO_STATIC_CHAIN (decl))
2504 local_regparm = 2;
2505 /* Each global register variable increases register preassure,
2506 so the more global reg vars there are, the smaller regparm
2507 optimization use, unless requested by the user explicitly. */
2508 for (regno = 0; regno < 6; regno++)
2509 if (global_regs[regno])
2510 globals++;
2511 local_regparm
2512 = globals < local_regparm ? local_regparm - globals : 0;
2514 if (local_regparm > regparm)
2515 regparm = local_regparm;
2519 return regparm;
2522 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2523 in SSE registers for a function with the indicated TYPE and DECL.
2524 DECL may be NULL when calling function indirectly
2525 or considering a libcall. Otherwise return 0. */
2527 static int
2528 ix86_function_sseregparm (tree type, tree decl)
2530 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2531 by the sseregparm attribute. */
2532 if (TARGET_SSEREGPARM
2533 || (type
2534 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2536 if (!TARGET_SSE)
2538 if (decl)
2539 error ("Calling %qD with attribute sseregparm without "
2540 "SSE/SSE2 enabled", decl);
2541 else
2542 error ("Calling %qT with attribute sseregparm without "
2543 "SSE/SSE2 enabled", type);
2544 return 0;
2547 return 2;
2550 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2551 in SSE registers even for 32-bit mode and not just 3, but up to
2552 8 SSE arguments in registers. */
2553 if (!TARGET_64BIT && decl
2554 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2556 struct cgraph_local_info *i = cgraph_local_info (decl);
2557 if (i && i->local)
2558 return TARGET_SSE2 ? 2 : 1;
2561 return 0;
2564 /* Return true if EAX is live at the start of the function. Used by
2565 ix86_expand_prologue to determine if we need special help before
2566 calling allocate_stack_worker. */
2568 static bool
2569 ix86_eax_live_at_start_p (void)
2571 /* Cheat. Don't bother working forward from ix86_function_regparm
2572 to the function type to whether an actual argument is located in
2573 eax. Instead just look at cfg info, which is still close enough
2574 to correct at this point. This gives false positives for broken
2575 functions that might use uninitialized data that happens to be
2576 allocated in eax, but who cares? */
2577 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2580 /* Value is the number of bytes of arguments automatically
2581 popped when returning from a subroutine call.
2582 FUNDECL is the declaration node of the function (as a tree),
2583 FUNTYPE is the data type of the function (as a tree),
2584 or for a library call it is an identifier node for the subroutine name.
2585 SIZE is the number of bytes of arguments passed on the stack.
2587 On the 80386, the RTD insn may be used to pop them if the number
2588 of args is fixed, but if the number is variable then the caller
2589 must pop them all. RTD can't be used for library calls now
2590 because the library is compiled with the Unix compiler.
2591 Use of RTD is a selectable option, since it is incompatible with
2592 standard Unix calling sequences. If the option is not selected,
2593 the caller must always pop the args.
2595 The attribute stdcall is equivalent to RTD on a per module basis. */
2598 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2600 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2602 /* Cdecl functions override -mrtd, and never pop the stack. */
2603 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2605 /* Stdcall and fastcall functions will pop the stack if not
2606 variable args. */
2607 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2608 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2609 rtd = 1;
2611 if (rtd
2612 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2613 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2614 == void_type_node)))
2615 return size;
2618 /* Lose any fake structure return argument if it is passed on the stack. */
2619 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2620 && !TARGET_64BIT
2621 && !KEEP_AGGREGATE_RETURN_POINTER)
2623 int nregs = ix86_function_regparm (funtype, fundecl);
2625 if (!nregs)
2626 return GET_MODE_SIZE (Pmode);
2629 return 0;
2632 /* Argument support functions. */
2634 /* Return true when register may be used to pass function parameters. */
2635 bool
2636 ix86_function_arg_regno_p (int regno)
2638 int i;
2639 if (!TARGET_64BIT)
2640 return (regno < REGPARM_MAX
2641 || (TARGET_MMX && MMX_REGNO_P (regno)
2642 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2643 || (TARGET_SSE && SSE_REGNO_P (regno)
2644 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2646 if (TARGET_SSE && SSE_REGNO_P (regno)
2647 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2648 return true;
2649 /* RAX is used as hidden argument to va_arg functions. */
2650 if (!regno)
2651 return true;
2652 for (i = 0; i < REGPARM_MAX; i++)
2653 if (regno == x86_64_int_parameter_registers[i])
2654 return true;
2655 return false;
2658 /* Return if we do not know how to pass TYPE solely in registers. */
2660 static bool
2661 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2663 if (must_pass_in_stack_var_size_or_pad (mode, type))
2664 return true;
2666 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2667 The layout_type routine is crafty and tries to trick us into passing
2668 currently unsupported vector types on the stack by using TImode. */
2669 return (!TARGET_64BIT && mode == TImode
2670 && type && TREE_CODE (type) != VECTOR_TYPE);
2673 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2674 for a call to a function whose data type is FNTYPE.
2675 For a library call, FNTYPE is 0. */
2677 void
2678 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2679 tree fntype, /* tree ptr for function decl */
2680 rtx libname, /* SYMBOL_REF of library name or 0 */
2681 tree fndecl)
2683 static CUMULATIVE_ARGS zero_cum;
2684 tree param, next_param;
2686 if (TARGET_DEBUG_ARG)
2688 fprintf (stderr, "\ninit_cumulative_args (");
2689 if (fntype)
2690 fprintf (stderr, "fntype code = %s, ret code = %s",
2691 tree_code_name[(int) TREE_CODE (fntype)],
2692 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2693 else
2694 fprintf (stderr, "no fntype");
2696 if (libname)
2697 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2700 *cum = zero_cum;
2702 /* Set up the number of registers to use for passing arguments. */
2703 cum->nregs = ix86_regparm;
2704 if (TARGET_SSE)
2705 cum->sse_nregs = SSE_REGPARM_MAX;
2706 if (TARGET_MMX)
2707 cum->mmx_nregs = MMX_REGPARM_MAX;
2708 cum->warn_sse = true;
2709 cum->warn_mmx = true;
2710 cum->maybe_vaarg = false;
2712 /* Use ecx and edx registers if function has fastcall attribute,
2713 else look for regparm information. */
2714 if (fntype && !TARGET_64BIT)
2716 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2718 cum->nregs = 2;
2719 cum->fastcall = 1;
2721 else
2722 cum->nregs = ix86_function_regparm (fntype, fndecl);
2725 /* Set up the number of SSE registers used for passing SFmode
2726 and DFmode arguments. Warn for mismatching ABI. */
2727 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2729 /* Determine if this function has variable arguments. This is
2730 indicated by the last argument being 'void_type_mode' if there
2731 are no variable arguments. If there are variable arguments, then
2732 we won't pass anything in registers in 32-bit mode. */
2734 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2736 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2737 param != 0; param = next_param)
2739 next_param = TREE_CHAIN (param);
2740 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2742 if (!TARGET_64BIT)
2744 cum->nregs = 0;
2745 cum->sse_nregs = 0;
2746 cum->mmx_nregs = 0;
2747 cum->warn_sse = 0;
2748 cum->warn_mmx = 0;
2749 cum->fastcall = 0;
2750 cum->float_in_sse = 0;
2752 cum->maybe_vaarg = true;
2756 if ((!fntype && !libname)
2757 || (fntype && !TYPE_ARG_TYPES (fntype)))
2758 cum->maybe_vaarg = true;
2760 if (TARGET_DEBUG_ARG)
2761 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2763 return;
2766 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2767 But in the case of vector types, it is some vector mode.
2769 When we have only some of our vector isa extensions enabled, then there
2770 are some modes for which vector_mode_supported_p is false. For these
2771 modes, the generic vector support in gcc will choose some non-vector mode
2772 in order to implement the type. By computing the natural mode, we'll
2773 select the proper ABI location for the operand and not depend on whatever
2774 the middle-end decides to do with these vector types. */
2776 static enum machine_mode
2777 type_natural_mode (tree type)
2779 enum machine_mode mode = TYPE_MODE (type);
2781 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2783 HOST_WIDE_INT size = int_size_in_bytes (type);
2784 if ((size == 8 || size == 16)
2785 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2786 && TYPE_VECTOR_SUBPARTS (type) > 1)
2788 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2790 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2791 mode = MIN_MODE_VECTOR_FLOAT;
2792 else
2793 mode = MIN_MODE_VECTOR_INT;
2795 /* Get the mode which has this inner mode and number of units. */
2796 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2797 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2798 && GET_MODE_INNER (mode) == innermode)
2799 return mode;
2801 gcc_unreachable ();
2805 return mode;
2808 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2809 this may not agree with the mode that the type system has chosen for the
2810 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2811 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2813 static rtx
2814 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2815 unsigned int regno)
2817 rtx tmp;
2819 if (orig_mode != BLKmode)
2820 tmp = gen_rtx_REG (orig_mode, regno);
2821 else
2823 tmp = gen_rtx_REG (mode, regno);
2824 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2825 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2828 return tmp;
2831 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2832 of this code is to classify each 8bytes of incoming argument by the register
2833 class and assign registers accordingly. */
2835 /* Return the union class of CLASS1 and CLASS2.
2836 See the x86-64 PS ABI for details. */
2838 static enum x86_64_reg_class
2839 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2841 /* Rule #1: If both classes are equal, this is the resulting class. */
2842 if (class1 == class2)
2843 return class1;
2845 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2846 the other class. */
2847 if (class1 == X86_64_NO_CLASS)
2848 return class2;
2849 if (class2 == X86_64_NO_CLASS)
2850 return class1;
2852 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2853 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2854 return X86_64_MEMORY_CLASS;
2856 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2857 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2858 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2859 return X86_64_INTEGERSI_CLASS;
2860 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2861 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2862 return X86_64_INTEGER_CLASS;
2864 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2865 MEMORY is used. */
2866 if (class1 == X86_64_X87_CLASS
2867 || class1 == X86_64_X87UP_CLASS
2868 || class1 == X86_64_COMPLEX_X87_CLASS
2869 || class2 == X86_64_X87_CLASS
2870 || class2 == X86_64_X87UP_CLASS
2871 || class2 == X86_64_COMPLEX_X87_CLASS)
2872 return X86_64_MEMORY_CLASS;
2874 /* Rule #6: Otherwise class SSE is used. */
2875 return X86_64_SSE_CLASS;
2878 /* Classify the argument of type TYPE and mode MODE.
2879 CLASSES will be filled by the register class used to pass each word
2880 of the operand. The number of words is returned. In case the parameter
2881 should be passed in memory, 0 is returned. As a special case for zero
2882 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2884 BIT_OFFSET is used internally for handling records and specifies offset
2885 of the offset in bits modulo 256 to avoid overflow cases.
2887 See the x86-64 PS ABI for details.
2890 static int
2891 classify_argument (enum machine_mode mode, tree type,
2892 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2894 HOST_WIDE_INT bytes =
2895 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2896 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2898 /* Variable sized entities are always passed/returned in memory. */
2899 if (bytes < 0)
2900 return 0;
2902 if (mode != VOIDmode
2903 && targetm.calls.must_pass_in_stack (mode, type))
2904 return 0;
2906 if (type && AGGREGATE_TYPE_P (type))
2908 int i;
2909 tree field;
2910 enum x86_64_reg_class subclasses[MAX_CLASSES];
2912 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2913 if (bytes > 16)
2914 return 0;
2916 for (i = 0; i < words; i++)
2917 classes[i] = X86_64_NO_CLASS;
2919 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2920 signalize memory class, so handle it as special case. */
2921 if (!words)
2923 classes[0] = X86_64_NO_CLASS;
2924 return 1;
2927 /* Classify each field of record and merge classes. */
2928 switch (TREE_CODE (type))
2930 case RECORD_TYPE:
2931 /* For classes first merge in the field of the subclasses. */
2932 if (TYPE_BINFO (type))
2934 tree binfo, base_binfo;
2935 int basenum;
2937 for (binfo = TYPE_BINFO (type), basenum = 0;
2938 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2940 int num;
2941 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2942 tree type = BINFO_TYPE (base_binfo);
2944 num = classify_argument (TYPE_MODE (type),
2945 type, subclasses,
2946 (offset + bit_offset) % 256);
2947 if (!num)
2948 return 0;
2949 for (i = 0; i < num; i++)
2951 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2952 classes[i + pos] =
2953 merge_classes (subclasses[i], classes[i + pos]);
2957 /* And now merge the fields of structure. */
2958 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2960 if (TREE_CODE (field) == FIELD_DECL)
2962 int num;
2964 /* Bitfields are always classified as integer. Handle them
2965 early, since later code would consider them to be
2966 misaligned integers. */
2967 if (DECL_BIT_FIELD (field))
2969 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2970 i < ((int_bit_position (field) + (bit_offset % 64))
2971 + tree_low_cst (DECL_SIZE (field), 0)
2972 + 63) / 8 / 8; i++)
2973 classes[i] =
2974 merge_classes (X86_64_INTEGER_CLASS,
2975 classes[i]);
2977 else
2979 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2980 TREE_TYPE (field), subclasses,
2981 (int_bit_position (field)
2982 + bit_offset) % 256);
2983 if (!num)
2984 return 0;
2985 for (i = 0; i < num; i++)
2987 int pos =
2988 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2989 classes[i + pos] =
2990 merge_classes (subclasses[i], classes[i + pos]);
2995 break;
2997 case ARRAY_TYPE:
2998 /* Arrays are handled as small records. */
3000 int num;
3001 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3002 TREE_TYPE (type), subclasses, bit_offset);
3003 if (!num)
3004 return 0;
3006 /* The partial classes are now full classes. */
3007 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3008 subclasses[0] = X86_64_SSE_CLASS;
3009 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3010 subclasses[0] = X86_64_INTEGER_CLASS;
3012 for (i = 0; i < words; i++)
3013 classes[i] = subclasses[i % num];
3015 break;
3017 case UNION_TYPE:
3018 case QUAL_UNION_TYPE:
3019 /* Unions are similar to RECORD_TYPE but offset is always 0.
3022 /* Unions are not derived. */
3023 gcc_assert (!TYPE_BINFO (type)
3024 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3025 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3027 if (TREE_CODE (field) == FIELD_DECL)
3029 int num;
3030 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3031 TREE_TYPE (field), subclasses,
3032 bit_offset);
3033 if (!num)
3034 return 0;
3035 for (i = 0; i < num; i++)
3036 classes[i] = merge_classes (subclasses[i], classes[i]);
3039 break;
3041 default:
3042 gcc_unreachable ();
3045 /* Final merger cleanup. */
3046 for (i = 0; i < words; i++)
3048 /* If one class is MEMORY, everything should be passed in
3049 memory. */
3050 if (classes[i] == X86_64_MEMORY_CLASS)
3051 return 0;
3053 /* The X86_64_SSEUP_CLASS should be always preceded by
3054 X86_64_SSE_CLASS. */
3055 if (classes[i] == X86_64_SSEUP_CLASS
3056 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3057 classes[i] = X86_64_SSE_CLASS;
3059 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3060 if (classes[i] == X86_64_X87UP_CLASS
3061 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3062 classes[i] = X86_64_SSE_CLASS;
3064 return words;
3067 /* Compute alignment needed. We align all types to natural boundaries with
3068 exception of XFmode that is aligned to 64bits. */
3069 if (mode != VOIDmode && mode != BLKmode)
3071 int mode_alignment = GET_MODE_BITSIZE (mode);
3073 if (mode == XFmode)
3074 mode_alignment = 128;
3075 else if (mode == XCmode)
3076 mode_alignment = 256;
3077 if (COMPLEX_MODE_P (mode))
3078 mode_alignment /= 2;
3079 /* Misaligned fields are always returned in memory. */
3080 if (bit_offset % mode_alignment)
3081 return 0;
3084 /* for V1xx modes, just use the base mode */
3085 if (VECTOR_MODE_P (mode)
3086 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3087 mode = GET_MODE_INNER (mode);
3089 /* Classification of atomic types. */
3090 switch (mode)
3092 case SDmode:
3093 case DDmode:
3094 classes[0] = X86_64_SSE_CLASS;
3095 return 1;
3096 case TDmode:
3097 classes[0] = X86_64_SSE_CLASS;
3098 classes[1] = X86_64_SSEUP_CLASS;
3099 return 2;
3100 case DImode:
3101 case SImode:
3102 case HImode:
3103 case QImode:
3104 case CSImode:
3105 case CHImode:
3106 case CQImode:
3107 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3108 classes[0] = X86_64_INTEGERSI_CLASS;
3109 else
3110 classes[0] = X86_64_INTEGER_CLASS;
3111 return 1;
3112 case CDImode:
3113 case TImode:
3114 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3115 return 2;
3116 case CTImode:
3117 return 0;
3118 case SFmode:
3119 if (!(bit_offset % 64))
3120 classes[0] = X86_64_SSESF_CLASS;
3121 else
3122 classes[0] = X86_64_SSE_CLASS;
3123 return 1;
3124 case DFmode:
3125 classes[0] = X86_64_SSEDF_CLASS;
3126 return 1;
3127 case XFmode:
3128 classes[0] = X86_64_X87_CLASS;
3129 classes[1] = X86_64_X87UP_CLASS;
3130 return 2;
3131 case TFmode:
3132 classes[0] = X86_64_SSE_CLASS;
3133 classes[1] = X86_64_SSEUP_CLASS;
3134 return 2;
3135 case SCmode:
3136 classes[0] = X86_64_SSE_CLASS;
3137 return 1;
3138 case DCmode:
3139 classes[0] = X86_64_SSEDF_CLASS;
3140 classes[1] = X86_64_SSEDF_CLASS;
3141 return 2;
3142 case XCmode:
3143 classes[0] = X86_64_COMPLEX_X87_CLASS;
3144 return 1;
3145 case TCmode:
3146 /* This modes is larger than 16 bytes. */
3147 return 0;
3148 case V4SFmode:
3149 case V4SImode:
3150 case V16QImode:
3151 case V8HImode:
3152 case V2DFmode:
3153 case V2DImode:
3154 classes[0] = X86_64_SSE_CLASS;
3155 classes[1] = X86_64_SSEUP_CLASS;
3156 return 2;
3157 case V2SFmode:
3158 case V2SImode:
3159 case V4HImode:
3160 case V8QImode:
3161 classes[0] = X86_64_SSE_CLASS;
3162 return 1;
3163 case BLKmode:
3164 case VOIDmode:
3165 return 0;
3166 default:
3167 gcc_assert (VECTOR_MODE_P (mode));
3169 if (bytes > 16)
3170 return 0;
3172 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3174 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3175 classes[0] = X86_64_INTEGERSI_CLASS;
3176 else
3177 classes[0] = X86_64_INTEGER_CLASS;
3178 classes[1] = X86_64_INTEGER_CLASS;
3179 return 1 + (bytes > 8);
3183 /* Examine the argument and return set number of register required in each
3184 class. Return 0 iff parameter should be passed in memory. */
3185 static int
3186 examine_argument (enum machine_mode mode, tree type, int in_return,
3187 int *int_nregs, int *sse_nregs)
3189 enum x86_64_reg_class class[MAX_CLASSES];
3190 int n = classify_argument (mode, type, class, 0);
3192 *int_nregs = 0;
3193 *sse_nregs = 0;
3194 if (!n)
3195 return 0;
3196 for (n--; n >= 0; n--)
3197 switch (class[n])
3199 case X86_64_INTEGER_CLASS:
3200 case X86_64_INTEGERSI_CLASS:
3201 (*int_nregs)++;
3202 break;
3203 case X86_64_SSE_CLASS:
3204 case X86_64_SSESF_CLASS:
3205 case X86_64_SSEDF_CLASS:
3206 (*sse_nregs)++;
3207 break;
3208 case X86_64_NO_CLASS:
3209 case X86_64_SSEUP_CLASS:
3210 break;
3211 case X86_64_X87_CLASS:
3212 case X86_64_X87UP_CLASS:
3213 if (!in_return)
3214 return 0;
3215 break;
3216 case X86_64_COMPLEX_X87_CLASS:
3217 return in_return ? 2 : 0;
3218 case X86_64_MEMORY_CLASS:
3219 gcc_unreachable ();
3221 return 1;
3224 /* Construct container for the argument used by GCC interface. See
3225 FUNCTION_ARG for the detailed description. */
3227 static rtx
3228 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3229 tree type, int in_return, int nintregs, int nsseregs,
3230 const int *intreg, int sse_regno)
3232 enum machine_mode tmpmode;
3233 int bytes =
3234 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3235 enum x86_64_reg_class class[MAX_CLASSES];
3236 int n;
3237 int i;
3238 int nexps = 0;
3239 int needed_sseregs, needed_intregs;
3240 rtx exp[MAX_CLASSES];
3241 rtx ret;
3243 n = classify_argument (mode, type, class, 0);
3244 if (TARGET_DEBUG_ARG)
3246 if (!n)
3247 fprintf (stderr, "Memory class\n");
3248 else
3250 fprintf (stderr, "Classes:");
3251 for (i = 0; i < n; i++)
3253 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3255 fprintf (stderr, "\n");
3258 if (!n)
3259 return NULL;
3260 if (!examine_argument (mode, type, in_return, &needed_intregs,
3261 &needed_sseregs))
3262 return NULL;
3263 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3264 return NULL;
3266 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3267 some less clueful developer tries to use floating-point anyway. */
3268 if (needed_sseregs && !TARGET_SSE)
3270 static bool issued_error;
3271 if (!issued_error)
3273 issued_error = true;
3274 if (in_return)
3275 error ("SSE register return with SSE disabled");
3276 else
3277 error ("SSE register argument with SSE disabled");
3279 return NULL;
3282 /* First construct simple cases. Avoid SCmode, since we want to use
3283 single register to pass this type. */
3284 if (n == 1 && mode != SCmode)
3285 switch (class[0])
3287 case X86_64_INTEGER_CLASS:
3288 case X86_64_INTEGERSI_CLASS:
3289 return gen_rtx_REG (mode, intreg[0]);
3290 case X86_64_SSE_CLASS:
3291 case X86_64_SSESF_CLASS:
3292 case X86_64_SSEDF_CLASS:
3293 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3294 case X86_64_X87_CLASS:
3295 case X86_64_COMPLEX_X87_CLASS:
3296 return gen_rtx_REG (mode, FIRST_STACK_REG);
3297 case X86_64_NO_CLASS:
3298 /* Zero sized array, struct or class. */
3299 return NULL;
3300 default:
3301 gcc_unreachable ();
3303 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3304 && mode != BLKmode)
3305 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3306 if (n == 2
3307 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3308 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3309 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3310 && class[1] == X86_64_INTEGER_CLASS
3311 && (mode == CDImode || mode == TImode || mode == TFmode)
3312 && intreg[0] + 1 == intreg[1])
3313 return gen_rtx_REG (mode, intreg[0]);
3315 /* Otherwise figure out the entries of the PARALLEL. */
3316 for (i = 0; i < n; i++)
3318 switch (class[i])
3320 case X86_64_NO_CLASS:
3321 break;
3322 case X86_64_INTEGER_CLASS:
3323 case X86_64_INTEGERSI_CLASS:
3324 /* Merge TImodes on aligned occasions here too. */
3325 if (i * 8 + 8 > bytes)
3326 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3327 else if (class[i] == X86_64_INTEGERSI_CLASS)
3328 tmpmode = SImode;
3329 else
3330 tmpmode = DImode;
3331 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3332 if (tmpmode == BLKmode)
3333 tmpmode = DImode;
3334 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3335 gen_rtx_REG (tmpmode, *intreg),
3336 GEN_INT (i*8));
3337 intreg++;
3338 break;
3339 case X86_64_SSESF_CLASS:
3340 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3341 gen_rtx_REG (SFmode,
3342 SSE_REGNO (sse_regno)),
3343 GEN_INT (i*8));
3344 sse_regno++;
3345 break;
3346 case X86_64_SSEDF_CLASS:
3347 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3348 gen_rtx_REG (DFmode,
3349 SSE_REGNO (sse_regno)),
3350 GEN_INT (i*8));
3351 sse_regno++;
3352 break;
3353 case X86_64_SSE_CLASS:
3354 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3355 tmpmode = TImode;
3356 else
3357 tmpmode = DImode;
3358 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3359 gen_rtx_REG (tmpmode,
3360 SSE_REGNO (sse_regno)),
3361 GEN_INT (i*8));
3362 if (tmpmode == TImode)
3363 i++;
3364 sse_regno++;
3365 break;
3366 default:
3367 gcc_unreachable ();
3371 /* Empty aligned struct, union or class. */
3372 if (nexps == 0)
3373 return NULL;
3375 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3376 for (i = 0; i < nexps; i++)
3377 XVECEXP (ret, 0, i) = exp [i];
3378 return ret;
3381 /* Update the data in CUM to advance over an argument
3382 of mode MODE and data type TYPE.
3383 (TYPE is null for libcalls where that information may not be available.) */
3385 void
3386 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3387 tree type, int named)
3389 int bytes =
3390 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3391 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3393 if (type)
3394 mode = type_natural_mode (type);
3396 if (TARGET_DEBUG_ARG)
3397 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3398 "mode=%s, named=%d)\n\n",
3399 words, cum->words, cum->nregs, cum->sse_nregs,
3400 GET_MODE_NAME (mode), named);
3402 if (TARGET_64BIT)
3404 int int_nregs, sse_nregs;
3405 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3406 cum->words += words;
3407 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3409 cum->nregs -= int_nregs;
3410 cum->sse_nregs -= sse_nregs;
3411 cum->regno += int_nregs;
3412 cum->sse_regno += sse_nregs;
3414 else
3415 cum->words += words;
3417 else
3419 switch (mode)
3421 default:
3422 break;
3424 case BLKmode:
3425 if (bytes < 0)
3426 break;
3427 /* FALLTHRU */
3429 case DImode:
3430 case SImode:
3431 case HImode:
3432 case QImode:
3433 cum->words += words;
3434 cum->nregs -= words;
3435 cum->regno += words;
3437 if (cum->nregs <= 0)
3439 cum->nregs = 0;
3440 cum->regno = 0;
3442 break;
3444 case DFmode:
3445 if (cum->float_in_sse < 2)
3446 break;
3447 case SFmode:
3448 if (cum->float_in_sse < 1)
3449 break;
3450 /* FALLTHRU */
3452 case TImode:
3453 case V16QImode:
3454 case V8HImode:
3455 case V4SImode:
3456 case V2DImode:
3457 case V4SFmode:
3458 case V2DFmode:
3459 if (!type || !AGGREGATE_TYPE_P (type))
3461 cum->sse_words += words;
3462 cum->sse_nregs -= 1;
3463 cum->sse_regno += 1;
3464 if (cum->sse_nregs <= 0)
3466 cum->sse_nregs = 0;
3467 cum->sse_regno = 0;
3470 break;
3472 case V8QImode:
3473 case V4HImode:
3474 case V2SImode:
3475 case V2SFmode:
3476 if (!type || !AGGREGATE_TYPE_P (type))
3478 cum->mmx_words += words;
3479 cum->mmx_nregs -= 1;
3480 cum->mmx_regno += 1;
3481 if (cum->mmx_nregs <= 0)
3483 cum->mmx_nregs = 0;
3484 cum->mmx_regno = 0;
3487 break;
3492 /* Define where to put the arguments to a function.
3493 Value is zero to push the argument on the stack,
3494 or a hard register in which to store the argument.
3496 MODE is the argument's machine mode.
3497 TYPE is the data type of the argument (as a tree).
3498 This is null for libcalls where that information may
3499 not be available.
3500 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3501 the preceding args and about the function being called.
3502 NAMED is nonzero if this argument is a named parameter
3503 (otherwise it is an extra parameter matching an ellipsis). */
3506 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3507 tree type, int named)
3509 enum machine_mode mode = orig_mode;
3510 rtx ret = NULL_RTX;
3511 int bytes =
3512 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3513 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3514 static bool warnedsse, warnedmmx;
3516 /* To simplify the code below, represent vector types with a vector mode
3517 even if MMX/SSE are not active. */
3518 if (type && TREE_CODE (type) == VECTOR_TYPE)
3519 mode = type_natural_mode (type);
3521 /* Handle a hidden AL argument containing number of registers for varargs
3522 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3523 any AL settings. */
3524 if (mode == VOIDmode)
3526 if (TARGET_64BIT)
3527 return GEN_INT (cum->maybe_vaarg
3528 ? (cum->sse_nregs < 0
3529 ? SSE_REGPARM_MAX
3530 : cum->sse_regno)
3531 : -1);
3532 else
3533 return constm1_rtx;
3535 if (TARGET_64BIT)
3536 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3537 cum->sse_nregs,
3538 &x86_64_int_parameter_registers [cum->regno],
3539 cum->sse_regno);
3540 else
3541 switch (mode)
3543 /* For now, pass fp/complex values on the stack. */
3544 default:
3545 break;
3547 case BLKmode:
3548 if (bytes < 0)
3549 break;
3550 /* FALLTHRU */
3551 case DImode:
3552 case SImode:
3553 case HImode:
3554 case QImode:
3555 if (words <= cum->nregs)
3557 int regno = cum->regno;
3559 /* Fastcall allocates the first two DWORD (SImode) or
3560 smaller arguments to ECX and EDX. */
3561 if (cum->fastcall)
3563 if (mode == BLKmode || mode == DImode)
3564 break;
3566 /* ECX not EAX is the first allocated register. */
3567 if (regno == 0)
3568 regno = 2;
3570 ret = gen_rtx_REG (mode, regno);
3572 break;
3573 case DFmode:
3574 if (cum->float_in_sse < 2)
3575 break;
3576 case SFmode:
3577 if (cum->float_in_sse < 1)
3578 break;
3579 /* FALLTHRU */
3580 case TImode:
3581 case V16QImode:
3582 case V8HImode:
3583 case V4SImode:
3584 case V2DImode:
3585 case V4SFmode:
3586 case V2DFmode:
3587 if (!type || !AGGREGATE_TYPE_P (type))
3589 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3591 warnedsse = true;
3592 warning (0, "SSE vector argument without SSE enabled "
3593 "changes the ABI");
3595 if (cum->sse_nregs)
3596 ret = gen_reg_or_parallel (mode, orig_mode,
3597 cum->sse_regno + FIRST_SSE_REG);
3599 break;
3600 case V8QImode:
3601 case V4HImode:
3602 case V2SImode:
3603 case V2SFmode:
3604 if (!type || !AGGREGATE_TYPE_P (type))
3606 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3608 warnedmmx = true;
3609 warning (0, "MMX vector argument without MMX enabled "
3610 "changes the ABI");
3612 if (cum->mmx_nregs)
3613 ret = gen_reg_or_parallel (mode, orig_mode,
3614 cum->mmx_regno + FIRST_MMX_REG);
3616 break;
3619 if (TARGET_DEBUG_ARG)
3621 fprintf (stderr,
3622 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3623 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3625 if (ret)
3626 print_simple_rtl (stderr, ret);
3627 else
3628 fprintf (stderr, ", stack");
3630 fprintf (stderr, " )\n");
3633 return ret;
3636 /* A C expression that indicates when an argument must be passed by
3637 reference. If nonzero for an argument, a copy of that argument is
3638 made in memory and a pointer to the argument is passed instead of
3639 the argument itself. The pointer is passed in whatever way is
3640 appropriate for passing a pointer to that type. */
3642 static bool
3643 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3644 enum machine_mode mode ATTRIBUTE_UNUSED,
3645 tree type, bool named ATTRIBUTE_UNUSED)
3647 if (!TARGET_64BIT)
3648 return 0;
3650 if (type && int_size_in_bytes (type) == -1)
3652 if (TARGET_DEBUG_ARG)
3653 fprintf (stderr, "function_arg_pass_by_reference\n");
3654 return 1;
3657 return 0;
3660 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3661 ABI. Only called if TARGET_SSE. */
3662 static bool
3663 contains_128bit_aligned_vector_p (tree type)
3665 enum machine_mode mode = TYPE_MODE (type);
3666 if (SSE_REG_MODE_P (mode)
3667 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3668 return true;
3669 if (TYPE_ALIGN (type) < 128)
3670 return false;
3672 if (AGGREGATE_TYPE_P (type))
3674 /* Walk the aggregates recursively. */
3675 switch (TREE_CODE (type))
3677 case RECORD_TYPE:
3678 case UNION_TYPE:
3679 case QUAL_UNION_TYPE:
3681 tree field;
3683 if (TYPE_BINFO (type))
3685 tree binfo, base_binfo;
3686 int i;
3688 for (binfo = TYPE_BINFO (type), i = 0;
3689 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3690 if (contains_128bit_aligned_vector_p
3691 (BINFO_TYPE (base_binfo)))
3692 return true;
3694 /* And now merge the fields of structure. */
3695 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3697 if (TREE_CODE (field) == FIELD_DECL
3698 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3699 return true;
3701 break;
3704 case ARRAY_TYPE:
3705 /* Just for use if some languages passes arrays by value. */
3706 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3707 return true;
3708 break;
3710 default:
3711 gcc_unreachable ();
3714 return false;
3717 /* Gives the alignment boundary, in bits, of an argument with the
3718 specified mode and type. */
3721 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3723 int align;
3724 if (type)
3725 align = TYPE_ALIGN (type);
3726 else
3727 align = GET_MODE_ALIGNMENT (mode);
3728 if (align < PARM_BOUNDARY)
3729 align = PARM_BOUNDARY;
3730 if (!TARGET_64BIT)
3732 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3733 make an exception for SSE modes since these require 128bit
3734 alignment.
3736 The handling here differs from field_alignment. ICC aligns MMX
3737 arguments to 4 byte boundaries, while structure fields are aligned
3738 to 8 byte boundaries. */
3739 if (!TARGET_SSE)
3740 align = PARM_BOUNDARY;
3741 else if (!type)
3743 if (!SSE_REG_MODE_P (mode))
3744 align = PARM_BOUNDARY;
3746 else
3748 if (!contains_128bit_aligned_vector_p (type))
3749 align = PARM_BOUNDARY;
3752 if (align > 128)
3753 align = 128;
3754 return align;
3757 /* Return true if N is a possible register number of function value. */
3758 bool
3759 ix86_function_value_regno_p (int regno)
3761 if (regno == 0
3762 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3763 || (regno == FIRST_SSE_REG && TARGET_SSE))
3764 return true;
3766 if (!TARGET_64BIT
3767 && (regno == FIRST_MMX_REG && TARGET_MMX))
3768 return true;
3770 return false;
3773 /* Define how to find the value returned by a function.
3774 VALTYPE is the data type of the value (as a tree).
3775 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3776 otherwise, FUNC is 0. */
3778 ix86_function_value (tree valtype, tree fntype_or_decl,
3779 bool outgoing ATTRIBUTE_UNUSED)
3781 enum machine_mode natmode = type_natural_mode (valtype);
3783 if (TARGET_64BIT)
3785 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3786 1, REGPARM_MAX, SSE_REGPARM_MAX,
3787 x86_64_int_return_registers, 0);
3788 /* For zero sized structures, construct_container return NULL, but we
3789 need to keep rest of compiler happy by returning meaningful value. */
3790 if (!ret)
3791 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3792 return ret;
3794 else
3796 tree fn = NULL_TREE, fntype;
3797 if (fntype_or_decl
3798 && DECL_P (fntype_or_decl))
3799 fn = fntype_or_decl;
3800 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3801 return gen_rtx_REG (TYPE_MODE (valtype),
3802 ix86_value_regno (natmode, fn, fntype));
3806 /* Return true iff type is returned in memory. */
3808 ix86_return_in_memory (tree type)
3810 int needed_intregs, needed_sseregs, size;
3811 enum machine_mode mode = type_natural_mode (type);
3813 if (TARGET_64BIT)
3814 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3816 if (mode == BLKmode)
3817 return 1;
3819 size = int_size_in_bytes (type);
3821 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3822 return 0;
3824 if (VECTOR_MODE_P (mode) || mode == TImode)
3826 /* User-created vectors small enough to fit in EAX. */
3827 if (size < 8)
3828 return 0;
3830 /* MMX/3dNow values are returned in MM0,
3831 except when it doesn't exits. */
3832 if (size == 8)
3833 return (TARGET_MMX ? 0 : 1);
3835 /* SSE values are returned in XMM0, except when it doesn't exist. */
3836 if (size == 16)
3837 return (TARGET_SSE ? 0 : 1);
3840 if (mode == XFmode)
3841 return 0;
3843 if (mode == TDmode)
3844 return 1;
3846 if (size > 12)
3847 return 1;
3848 return 0;
3851 /* When returning SSE vector types, we have a choice of either
3852 (1) being abi incompatible with a -march switch, or
3853 (2) generating an error.
3854 Given no good solution, I think the safest thing is one warning.
3855 The user won't be able to use -Werror, but....
3857 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3858 called in response to actually generating a caller or callee that
3859 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3860 via aggregate_value_p for general type probing from tree-ssa. */
3862 static rtx
3863 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3865 static bool warnedsse, warnedmmx;
3867 if (type)
3869 /* Look at the return type of the function, not the function type. */
3870 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3872 if (!TARGET_SSE && !warnedsse)
3874 if (mode == TImode
3875 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3877 warnedsse = true;
3878 warning (0, "SSE vector return without SSE enabled "
3879 "changes the ABI");
3883 if (!TARGET_MMX && !warnedmmx)
3885 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3887 warnedmmx = true;
3888 warning (0, "MMX vector return without MMX enabled "
3889 "changes the ABI");
3894 return NULL;
3897 /* Define how to find the value returned by a library function
3898 assuming the value has mode MODE. */
3900 ix86_libcall_value (enum machine_mode mode)
3902 if (TARGET_64BIT)
3904 switch (mode)
3906 case SFmode:
3907 case SCmode:
3908 case DFmode:
3909 case DCmode:
3910 case TFmode:
3911 case SDmode:
3912 case DDmode:
3913 case TDmode:
3914 return gen_rtx_REG (mode, FIRST_SSE_REG);
3915 case XFmode:
3916 case XCmode:
3917 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3918 case TCmode:
3919 return NULL;
3920 default:
3921 return gen_rtx_REG (mode, 0);
3924 else
3925 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3928 /* Given a mode, return the register to use for a return value. */
3930 static int
3931 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3933 gcc_assert (!TARGET_64BIT);
3935 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3936 we prevent this case when mmx is not available. */
3937 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3938 return FIRST_MMX_REG;
3940 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3941 we prevent this case when sse is not available. */
3942 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3943 return FIRST_SSE_REG;
3945 /* Decimal floating point values can go in %eax, unlike other float modes. */
3946 if (DECIMAL_FLOAT_MODE_P (mode))
3947 return 0;
3949 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3950 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
3951 return 0;
3953 /* Floating point return values in %st(0), except for local functions when
3954 SSE math is enabled or for functions with sseregparm attribute. */
3955 if ((func || fntype)
3956 && (mode == SFmode || mode == DFmode))
3958 int sse_level = ix86_function_sseregparm (fntype, func);
3959 if ((sse_level >= 1 && mode == SFmode)
3960 || (sse_level == 2 && mode == DFmode))
3961 return FIRST_SSE_REG;
3964 return FIRST_FLOAT_REG;
3967 /* Create the va_list data type. */
3969 static tree
3970 ix86_build_builtin_va_list (void)
3972 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3974 /* For i386 we use plain pointer to argument area. */
3975 if (!TARGET_64BIT)
3976 return build_pointer_type (char_type_node);
3978 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3979 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3981 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3982 unsigned_type_node);
3983 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3984 unsigned_type_node);
3985 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3986 ptr_type_node);
3987 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3988 ptr_type_node);
3990 va_list_gpr_counter_field = f_gpr;
3991 va_list_fpr_counter_field = f_fpr;
3993 DECL_FIELD_CONTEXT (f_gpr) = record;
3994 DECL_FIELD_CONTEXT (f_fpr) = record;
3995 DECL_FIELD_CONTEXT (f_ovf) = record;
3996 DECL_FIELD_CONTEXT (f_sav) = record;
3998 TREE_CHAIN (record) = type_decl;
3999 TYPE_NAME (record) = type_decl;
4000 TYPE_FIELDS (record) = f_gpr;
4001 TREE_CHAIN (f_gpr) = f_fpr;
4002 TREE_CHAIN (f_fpr) = f_ovf;
4003 TREE_CHAIN (f_ovf) = f_sav;
4005 layout_type (record);
4007 /* The correct type is an array type of one element. */
4008 return build_array_type (record, build_index_type (size_zero_node));
4011 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4013 static void
4014 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4015 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4016 int no_rtl)
4018 CUMULATIVE_ARGS next_cum;
4019 rtx save_area = NULL_RTX, mem;
4020 rtx label;
4021 rtx label_ref;
4022 rtx tmp_reg;
4023 rtx nsse_reg;
4024 int set;
4025 tree fntype;
4026 int stdarg_p;
4027 int i;
4029 if (!TARGET_64BIT)
4030 return;
4032 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4033 return;
4035 /* Indicate to allocate space on the stack for varargs save area. */
4036 ix86_save_varrargs_registers = 1;
4038 cfun->stack_alignment_needed = 128;
4040 fntype = TREE_TYPE (current_function_decl);
4041 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4042 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4043 != void_type_node));
4045 /* For varargs, we do not want to skip the dummy va_dcl argument.
4046 For stdargs, we do want to skip the last named argument. */
4047 next_cum = *cum;
4048 if (stdarg_p)
4049 function_arg_advance (&next_cum, mode, type, 1);
4051 if (!no_rtl)
4052 save_area = frame_pointer_rtx;
4054 set = get_varargs_alias_set ();
4056 for (i = next_cum.regno;
4057 i < ix86_regparm
4058 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4059 i++)
4061 mem = gen_rtx_MEM (Pmode,
4062 plus_constant (save_area, i * UNITS_PER_WORD));
4063 MEM_NOTRAP_P (mem) = 1;
4064 set_mem_alias_set (mem, set);
4065 emit_move_insn (mem, gen_rtx_REG (Pmode,
4066 x86_64_int_parameter_registers[i]));
4069 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4071 /* Now emit code to save SSE registers. The AX parameter contains number
4072 of SSE parameter registers used to call this function. We use
4073 sse_prologue_save insn template that produces computed jump across
4074 SSE saves. We need some preparation work to get this working. */
4076 label = gen_label_rtx ();
4077 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4079 /* Compute address to jump to :
4080 label - 5*eax + nnamed_sse_arguments*5 */
4081 tmp_reg = gen_reg_rtx (Pmode);
4082 nsse_reg = gen_reg_rtx (Pmode);
4083 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4084 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4085 gen_rtx_MULT (Pmode, nsse_reg,
4086 GEN_INT (4))));
4087 if (next_cum.sse_regno)
4088 emit_move_insn
4089 (nsse_reg,
4090 gen_rtx_CONST (DImode,
4091 gen_rtx_PLUS (DImode,
4092 label_ref,
4093 GEN_INT (next_cum.sse_regno * 4))));
4094 else
4095 emit_move_insn (nsse_reg, label_ref);
4096 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4098 /* Compute address of memory block we save into. We always use pointer
4099 pointing 127 bytes after first byte to store - this is needed to keep
4100 instruction size limited by 4 bytes. */
4101 tmp_reg = gen_reg_rtx (Pmode);
4102 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4103 plus_constant (save_area,
4104 8 * REGPARM_MAX + 127)));
4105 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4106 MEM_NOTRAP_P (mem) = 1;
4107 set_mem_alias_set (mem, set);
4108 set_mem_align (mem, BITS_PER_WORD);
4110 /* And finally do the dirty job! */
4111 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4112 GEN_INT (next_cum.sse_regno), label));
4117 /* Implement va_start. */
4119 void
4120 ix86_va_start (tree valist, rtx nextarg)
4122 HOST_WIDE_INT words, n_gpr, n_fpr;
4123 tree f_gpr, f_fpr, f_ovf, f_sav;
4124 tree gpr, fpr, ovf, sav, t;
4126 /* Only 64bit target needs something special. */
4127 if (!TARGET_64BIT)
4129 std_expand_builtin_va_start (valist, nextarg);
4130 return;
4133 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4134 f_fpr = TREE_CHAIN (f_gpr);
4135 f_ovf = TREE_CHAIN (f_fpr);
4136 f_sav = TREE_CHAIN (f_ovf);
4138 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4139 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4140 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4141 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4142 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4144 /* Count number of gp and fp argument registers used. */
4145 words = current_function_args_info.words;
4146 n_gpr = current_function_args_info.regno;
4147 n_fpr = current_function_args_info.sse_regno;
4149 if (TARGET_DEBUG_ARG)
4150 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4151 (int) words, (int) n_gpr, (int) n_fpr);
4153 if (cfun->va_list_gpr_size)
4155 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
4156 build_int_cst (NULL_TREE, n_gpr * 8));
4157 TREE_SIDE_EFFECTS (t) = 1;
4158 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4161 if (cfun->va_list_fpr_size)
4163 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
4164 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
4165 TREE_SIDE_EFFECTS (t) = 1;
4166 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4169 /* Find the overflow area. */
4170 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
4171 if (words != 0)
4172 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), t,
4173 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
4174 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4175 TREE_SIDE_EFFECTS (t) = 1;
4176 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4178 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4180 /* Find the register save area.
4181 Prologue of the function save it right above stack frame. */
4182 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
4183 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
4184 TREE_SIDE_EFFECTS (t) = 1;
4185 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4189 /* Implement va_arg. */
4191 tree
4192 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4194 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4195 tree f_gpr, f_fpr, f_ovf, f_sav;
4196 tree gpr, fpr, ovf, sav, t;
4197 int size, rsize;
4198 tree lab_false, lab_over = NULL_TREE;
4199 tree addr, t2;
4200 rtx container;
4201 int indirect_p = 0;
4202 tree ptrtype;
4203 enum machine_mode nat_mode;
4205 /* Only 64bit target needs something special. */
4206 if (!TARGET_64BIT)
4207 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4209 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4210 f_fpr = TREE_CHAIN (f_gpr);
4211 f_ovf = TREE_CHAIN (f_fpr);
4212 f_sav = TREE_CHAIN (f_ovf);
4214 valist = build_va_arg_indirect_ref (valist);
4215 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4216 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4217 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4218 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4220 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4221 if (indirect_p)
4222 type = build_pointer_type (type);
4223 size = int_size_in_bytes (type);
4224 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4226 nat_mode = type_natural_mode (type);
4227 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4228 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4230 /* Pull the value out of the saved registers. */
4232 addr = create_tmp_var (ptr_type_node, "addr");
4233 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4235 if (container)
4237 int needed_intregs, needed_sseregs;
4238 bool need_temp;
4239 tree int_addr, sse_addr;
4241 lab_false = create_artificial_label ();
4242 lab_over = create_artificial_label ();
4244 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4246 need_temp = (!REG_P (container)
4247 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4248 || TYPE_ALIGN (type) > 128));
4250 /* In case we are passing structure, verify that it is consecutive block
4251 on the register save area. If not we need to do moves. */
4252 if (!need_temp && !REG_P (container))
4254 /* Verify that all registers are strictly consecutive */
4255 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4257 int i;
4259 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4261 rtx slot = XVECEXP (container, 0, i);
4262 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4263 || INTVAL (XEXP (slot, 1)) != i * 16)
4264 need_temp = 1;
4267 else
4269 int i;
4271 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4273 rtx slot = XVECEXP (container, 0, i);
4274 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4275 || INTVAL (XEXP (slot, 1)) != i * 8)
4276 need_temp = 1;
4280 if (!need_temp)
4282 int_addr = addr;
4283 sse_addr = addr;
4285 else
4287 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4288 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4289 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4290 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4293 /* First ensure that we fit completely in registers. */
4294 if (needed_intregs)
4296 t = build_int_cst (TREE_TYPE (gpr),
4297 (REGPARM_MAX - needed_intregs + 1) * 8);
4298 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4299 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4300 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4301 gimplify_and_add (t, pre_p);
4303 if (needed_sseregs)
4305 t = build_int_cst (TREE_TYPE (fpr),
4306 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4307 + REGPARM_MAX * 8);
4308 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4309 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4310 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4311 gimplify_and_add (t, pre_p);
4314 /* Compute index to start of area used for integer regs. */
4315 if (needed_intregs)
4317 /* int_addr = gpr + sav; */
4318 t = fold_convert (ptr_type_node, gpr);
4319 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4320 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4321 gimplify_and_add (t, pre_p);
4323 if (needed_sseregs)
4325 /* sse_addr = fpr + sav; */
4326 t = fold_convert (ptr_type_node, fpr);
4327 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4328 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4329 gimplify_and_add (t, pre_p);
4331 if (need_temp)
4333 int i;
4334 tree temp = create_tmp_var (type, "va_arg_tmp");
4336 /* addr = &temp; */
4337 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4338 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4339 gimplify_and_add (t, pre_p);
4341 for (i = 0; i < XVECLEN (container, 0); i++)
4343 rtx slot = XVECEXP (container, 0, i);
4344 rtx reg = XEXP (slot, 0);
4345 enum machine_mode mode = GET_MODE (reg);
4346 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4347 tree addr_type = build_pointer_type (piece_type);
4348 tree src_addr, src;
4349 int src_offset;
4350 tree dest_addr, dest;
4352 if (SSE_REGNO_P (REGNO (reg)))
4354 src_addr = sse_addr;
4355 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4357 else
4359 src_addr = int_addr;
4360 src_offset = REGNO (reg) * 8;
4362 src_addr = fold_convert (addr_type, src_addr);
4363 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4364 size_int (src_offset)));
4365 src = build_va_arg_indirect_ref (src_addr);
4367 dest_addr = fold_convert (addr_type, addr);
4368 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4369 size_int (INTVAL (XEXP (slot, 1)))));
4370 dest = build_va_arg_indirect_ref (dest_addr);
4372 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4373 gimplify_and_add (t, pre_p);
4377 if (needed_intregs)
4379 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4380 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4381 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4382 gimplify_and_add (t, pre_p);
4384 if (needed_sseregs)
4386 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4387 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4388 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4389 gimplify_and_add (t, pre_p);
4392 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4393 gimplify_and_add (t, pre_p);
4395 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4396 append_to_statement_list (t, pre_p);
4399 /* ... otherwise out of the overflow area. */
4401 /* Care for on-stack alignment if needed. */
4402 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4403 || integer_zerop (TYPE_SIZE (type)))
4404 t = ovf;
4405 else
4407 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4408 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4409 build_int_cst (TREE_TYPE (ovf), align - 1));
4410 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4411 build_int_cst (TREE_TYPE (t), -align));
4413 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4415 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4416 gimplify_and_add (t2, pre_p);
4418 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4419 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4420 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4421 gimplify_and_add (t, pre_p);
4423 if (container)
4425 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4426 append_to_statement_list (t, pre_p);
4429 ptrtype = build_pointer_type (type);
4430 addr = fold_convert (ptrtype, addr);
4432 if (indirect_p)
4433 addr = build_va_arg_indirect_ref (addr);
4434 return build_va_arg_indirect_ref (addr);
4437 /* Return nonzero if OPNUM's MEM should be matched
4438 in movabs* patterns. */
4441 ix86_check_movabs (rtx insn, int opnum)
4443 rtx set, mem;
4445 set = PATTERN (insn);
4446 if (GET_CODE (set) == PARALLEL)
4447 set = XVECEXP (set, 0, 0);
4448 gcc_assert (GET_CODE (set) == SET);
4449 mem = XEXP (set, opnum);
4450 while (GET_CODE (mem) == SUBREG)
4451 mem = SUBREG_REG (mem);
4452 gcc_assert (GET_CODE (mem) == MEM);
4453 return (volatile_ok || !MEM_VOLATILE_P (mem));
4456 /* Initialize the table of extra 80387 mathematical constants. */
4458 static void
4459 init_ext_80387_constants (void)
4461 static const char * cst[5] =
4463 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4464 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4465 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4466 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4467 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4469 int i;
4471 for (i = 0; i < 5; i++)
4473 real_from_string (&ext_80387_constants_table[i], cst[i]);
4474 /* Ensure each constant is rounded to XFmode precision. */
4475 real_convert (&ext_80387_constants_table[i],
4476 XFmode, &ext_80387_constants_table[i]);
4479 ext_80387_constants_init = 1;
4482 /* Return true if the constant is something that can be loaded with
4483 a special instruction. */
4486 standard_80387_constant_p (rtx x)
4488 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4489 return -1;
4491 if (x == CONST0_RTX (GET_MODE (x)))
4492 return 1;
4493 if (x == CONST1_RTX (GET_MODE (x)))
4494 return 2;
4496 /* For XFmode constants, try to find a special 80387 instruction when
4497 optimizing for size or on those CPUs that benefit from them. */
4498 if (GET_MODE (x) == XFmode
4499 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4501 REAL_VALUE_TYPE r;
4502 int i;
4504 if (! ext_80387_constants_init)
4505 init_ext_80387_constants ();
4507 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4508 for (i = 0; i < 5; i++)
4509 if (real_identical (&r, &ext_80387_constants_table[i]))
4510 return i + 3;
4513 return 0;
4516 /* Return the opcode of the special instruction to be used to load
4517 the constant X. */
4519 const char *
4520 standard_80387_constant_opcode (rtx x)
4522 switch (standard_80387_constant_p (x))
4524 case 1:
4525 return "fldz";
4526 case 2:
4527 return "fld1";
4528 case 3:
4529 return "fldlg2";
4530 case 4:
4531 return "fldln2";
4532 case 5:
4533 return "fldl2e";
4534 case 6:
4535 return "fldl2t";
4536 case 7:
4537 return "fldpi";
4538 default:
4539 gcc_unreachable ();
4543 /* Return the CONST_DOUBLE representing the 80387 constant that is
4544 loaded by the specified special instruction. The argument IDX
4545 matches the return value from standard_80387_constant_p. */
4548 standard_80387_constant_rtx (int idx)
4550 int i;
4552 if (! ext_80387_constants_init)
4553 init_ext_80387_constants ();
4555 switch (idx)
4557 case 3:
4558 case 4:
4559 case 5:
4560 case 6:
4561 case 7:
4562 i = idx - 3;
4563 break;
4565 default:
4566 gcc_unreachable ();
4569 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4570 XFmode);
4573 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4576 standard_sse_constant_p (rtx x)
4578 if (x == const0_rtx)
4579 return 1;
4580 return (x == CONST0_RTX (GET_MODE (x)));
4583 /* Returns 1 if OP contains a symbol reference */
4586 symbolic_reference_mentioned_p (rtx op)
4588 const char *fmt;
4589 int i;
4591 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4592 return 1;
4594 fmt = GET_RTX_FORMAT (GET_CODE (op));
4595 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4597 if (fmt[i] == 'E')
4599 int j;
4601 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4602 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4603 return 1;
4606 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4607 return 1;
4610 return 0;
4613 /* Return 1 if it is appropriate to emit `ret' instructions in the
4614 body of a function. Do this only if the epilogue is simple, needing a
4615 couple of insns. Prior to reloading, we can't tell how many registers
4616 must be saved, so return 0 then. Return 0 if there is no frame
4617 marker to de-allocate. */
4620 ix86_can_use_return_insn_p (void)
4622 struct ix86_frame frame;
4624 if (! reload_completed || frame_pointer_needed)
4625 return 0;
4627 /* Don't allow more than 32 pop, since that's all we can do
4628 with one instruction. */
4629 if (current_function_pops_args
4630 && current_function_args_size >= 32768)
4631 return 0;
4633 ix86_compute_frame_layout (&frame);
4634 return frame.to_allocate == 0 && frame.nregs == 0;
4637 /* Value should be nonzero if functions must have frame pointers.
4638 Zero means the frame pointer need not be set up (and parms may
4639 be accessed via the stack pointer) in functions that seem suitable. */
4642 ix86_frame_pointer_required (void)
4644 /* If we accessed previous frames, then the generated code expects
4645 to be able to access the saved ebp value in our frame. */
4646 if (cfun->machine->accesses_prev_frame)
4647 return 1;
4649 /* Several x86 os'es need a frame pointer for other reasons,
4650 usually pertaining to setjmp. */
4651 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4652 return 1;
4654 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4655 the frame pointer by default. Turn it back on now if we've not
4656 got a leaf function. */
4657 if (TARGET_OMIT_LEAF_FRAME_POINTER
4658 && (!current_function_is_leaf
4659 || ix86_current_function_calls_tls_descriptor))
4660 return 1;
4662 if (current_function_profile)
4663 return 1;
4665 return 0;
4668 /* Record that the current function accesses previous call frames. */
4670 void
4671 ix86_setup_frame_addresses (void)
4673 cfun->machine->accesses_prev_frame = 1;
4676 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4677 # define USE_HIDDEN_LINKONCE 1
4678 #else
4679 # define USE_HIDDEN_LINKONCE 0
4680 #endif
4682 static int pic_labels_used;
4684 /* Fills in the label name that should be used for a pc thunk for
4685 the given register. */
4687 static void
4688 get_pc_thunk_name (char name[32], unsigned int regno)
4690 if (USE_HIDDEN_LINKONCE)
4691 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4692 else
4693 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4697 /* This function generates code for -fpic that loads %ebx with
4698 the return address of the caller and then returns. */
4700 void
4701 ix86_file_end (void)
4703 rtx xops[2];
4704 int regno;
4706 for (regno = 0; regno < 8; ++regno)
4708 char name[32];
4710 if (! ((pic_labels_used >> regno) & 1))
4711 continue;
4713 get_pc_thunk_name (name, regno);
4715 #if TARGET_MACHO
4716 if (TARGET_MACHO)
4718 switch_to_section (darwin_sections[text_coal_section]);
4719 fputs ("\t.weak_definition\t", asm_out_file);
4720 assemble_name (asm_out_file, name);
4721 fputs ("\n\t.private_extern\t", asm_out_file);
4722 assemble_name (asm_out_file, name);
4723 fputs ("\n", asm_out_file);
4724 ASM_OUTPUT_LABEL (asm_out_file, name);
4726 else
4727 #endif
4728 if (USE_HIDDEN_LINKONCE)
4730 tree decl;
4732 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4733 error_mark_node);
4734 TREE_PUBLIC (decl) = 1;
4735 TREE_STATIC (decl) = 1;
4736 DECL_ONE_ONLY (decl) = 1;
4738 (*targetm.asm_out.unique_section) (decl, 0);
4739 switch_to_section (get_named_section (decl, NULL, 0));
4741 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4742 fputs ("\t.hidden\t", asm_out_file);
4743 assemble_name (asm_out_file, name);
4744 fputc ('\n', asm_out_file);
4745 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4747 else
4749 switch_to_section (text_section);
4750 ASM_OUTPUT_LABEL (asm_out_file, name);
4753 xops[0] = gen_rtx_REG (SImode, regno);
4754 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4755 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4756 output_asm_insn ("ret", xops);
4759 if (NEED_INDICATE_EXEC_STACK)
4760 file_end_indicate_exec_stack ();
4763 /* Emit code for the SET_GOT patterns. */
4765 const char *
4766 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4768 rtx xops[3];
4770 xops[0] = dest;
4771 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4773 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4775 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4777 if (!flag_pic)
4778 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4779 else
4780 output_asm_insn ("call\t%a2", xops);
4782 #if TARGET_MACHO
4783 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4784 is what will be referenced by the Mach-O PIC subsystem. */
4785 if (!label)
4786 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4787 #endif
4789 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4790 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4792 if (flag_pic)
4793 output_asm_insn ("pop{l}\t%0", xops);
4795 else
4797 char name[32];
4798 get_pc_thunk_name (name, REGNO (dest));
4799 pic_labels_used |= 1 << REGNO (dest);
4801 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4802 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4803 output_asm_insn ("call\t%X2", xops);
4804 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4805 is what will be referenced by the Mach-O PIC subsystem. */
4806 #if TARGET_MACHO
4807 if (!label)
4808 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4809 else
4810 targetm.asm_out.internal_label (asm_out_file, "L",
4811 CODE_LABEL_NUMBER (label));
4812 #endif
4815 if (TARGET_MACHO)
4816 return "";
4818 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4819 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4820 else
4821 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4823 return "";
4826 /* Generate an "push" pattern for input ARG. */
4828 static rtx
4829 gen_push (rtx arg)
4831 return gen_rtx_SET (VOIDmode,
4832 gen_rtx_MEM (Pmode,
4833 gen_rtx_PRE_DEC (Pmode,
4834 stack_pointer_rtx)),
4835 arg);
4838 /* Return >= 0 if there is an unused call-clobbered register available
4839 for the entire function. */
4841 static unsigned int
4842 ix86_select_alt_pic_regnum (void)
4844 if (current_function_is_leaf && !current_function_profile
4845 && !ix86_current_function_calls_tls_descriptor)
4847 int i;
4848 for (i = 2; i >= 0; --i)
4849 if (!regs_ever_live[i])
4850 return i;
4853 return INVALID_REGNUM;
4856 /* Return 1 if we need to save REGNO. */
4857 static int
4858 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4860 if (pic_offset_table_rtx
4861 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4862 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4863 || current_function_profile
4864 || current_function_calls_eh_return
4865 || current_function_uses_const_pool))
4867 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4868 return 0;
4869 return 1;
4872 if (current_function_calls_eh_return && maybe_eh_return)
4874 unsigned i;
4875 for (i = 0; ; i++)
4877 unsigned test = EH_RETURN_DATA_REGNO (i);
4878 if (test == INVALID_REGNUM)
4879 break;
4880 if (test == regno)
4881 return 1;
4885 if (cfun->machine->force_align_arg_pointer
4886 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4887 return 1;
4889 return (regs_ever_live[regno]
4890 && !call_used_regs[regno]
4891 && !fixed_regs[regno]
4892 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4895 /* Return number of registers to be saved on the stack. */
4897 static int
4898 ix86_nsaved_regs (void)
4900 int nregs = 0;
4901 int regno;
4903 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4904 if (ix86_save_reg (regno, true))
4905 nregs++;
4906 return nregs;
4909 /* Return the offset between two registers, one to be eliminated, and the other
4910 its replacement, at the start of a routine. */
4912 HOST_WIDE_INT
4913 ix86_initial_elimination_offset (int from, int to)
4915 struct ix86_frame frame;
4916 ix86_compute_frame_layout (&frame);
4918 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4919 return frame.hard_frame_pointer_offset;
4920 else if (from == FRAME_POINTER_REGNUM
4921 && to == HARD_FRAME_POINTER_REGNUM)
4922 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4923 else
4925 gcc_assert (to == STACK_POINTER_REGNUM);
4927 if (from == ARG_POINTER_REGNUM)
4928 return frame.stack_pointer_offset;
4930 gcc_assert (from == FRAME_POINTER_REGNUM);
4931 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4935 /* Fill structure ix86_frame about frame of currently computed function. */
4937 static void
4938 ix86_compute_frame_layout (struct ix86_frame *frame)
4940 HOST_WIDE_INT total_size;
4941 unsigned int stack_alignment_needed;
4942 HOST_WIDE_INT offset;
4943 unsigned int preferred_alignment;
4944 HOST_WIDE_INT size = get_frame_size ();
4946 frame->nregs = ix86_nsaved_regs ();
4947 total_size = size;
4949 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4950 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4952 /* During reload iteration the amount of registers saved can change.
4953 Recompute the value as needed. Do not recompute when amount of registers
4954 didn't change as reload does multiple calls to the function and does not
4955 expect the decision to change within single iteration. */
4956 if (!optimize_size
4957 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4959 int count = frame->nregs;
4961 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4962 /* The fast prologue uses move instead of push to save registers. This
4963 is significantly longer, but also executes faster as modern hardware
4964 can execute the moves in parallel, but can't do that for push/pop.
4966 Be careful about choosing what prologue to emit: When function takes
4967 many instructions to execute we may use slow version as well as in
4968 case function is known to be outside hot spot (this is known with
4969 feedback only). Weight the size of function by number of registers
4970 to save as it is cheap to use one or two push instructions but very
4971 slow to use many of them. */
4972 if (count)
4973 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4974 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4975 || (flag_branch_probabilities
4976 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4977 cfun->machine->use_fast_prologue_epilogue = false;
4978 else
4979 cfun->machine->use_fast_prologue_epilogue
4980 = !expensive_function_p (count);
4982 if (TARGET_PROLOGUE_USING_MOVE
4983 && cfun->machine->use_fast_prologue_epilogue)
4984 frame->save_regs_using_mov = true;
4985 else
4986 frame->save_regs_using_mov = false;
4989 /* Skip return address and saved base pointer. */
4990 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4992 frame->hard_frame_pointer_offset = offset;
4994 /* Do some sanity checking of stack_alignment_needed and
4995 preferred_alignment, since i386 port is the only using those features
4996 that may break easily. */
4998 gcc_assert (!size || stack_alignment_needed);
4999 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5000 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5001 gcc_assert (stack_alignment_needed
5002 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5004 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5005 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5007 /* Register save area */
5008 offset += frame->nregs * UNITS_PER_WORD;
5010 /* Va-arg area */
5011 if (ix86_save_varrargs_registers)
5013 offset += X86_64_VARARGS_SIZE;
5014 frame->va_arg_size = X86_64_VARARGS_SIZE;
5016 else
5017 frame->va_arg_size = 0;
5019 /* Align start of frame for local function. */
5020 frame->padding1 = ((offset + stack_alignment_needed - 1)
5021 & -stack_alignment_needed) - offset;
5023 offset += frame->padding1;
5025 /* Frame pointer points here. */
5026 frame->frame_pointer_offset = offset;
5028 offset += size;
5030 /* Add outgoing arguments area. Can be skipped if we eliminated
5031 all the function calls as dead code.
5032 Skipping is however impossible when function calls alloca. Alloca
5033 expander assumes that last current_function_outgoing_args_size
5034 of stack frame are unused. */
5035 if (ACCUMULATE_OUTGOING_ARGS
5036 && (!current_function_is_leaf || current_function_calls_alloca
5037 || ix86_current_function_calls_tls_descriptor))
5039 offset += current_function_outgoing_args_size;
5040 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5042 else
5043 frame->outgoing_arguments_size = 0;
5045 /* Align stack boundary. Only needed if we're calling another function
5046 or using alloca. */
5047 if (!current_function_is_leaf || current_function_calls_alloca
5048 || ix86_current_function_calls_tls_descriptor)
5049 frame->padding2 = ((offset + preferred_alignment - 1)
5050 & -preferred_alignment) - offset;
5051 else
5052 frame->padding2 = 0;
5054 offset += frame->padding2;
5056 /* We've reached end of stack frame. */
5057 frame->stack_pointer_offset = offset;
5059 /* Size prologue needs to allocate. */
5060 frame->to_allocate =
5061 (size + frame->padding1 + frame->padding2
5062 + frame->outgoing_arguments_size + frame->va_arg_size);
5064 if ((!frame->to_allocate && frame->nregs <= 1)
5065 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5066 frame->save_regs_using_mov = false;
5068 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5069 && current_function_is_leaf
5070 && !ix86_current_function_calls_tls_descriptor)
5072 frame->red_zone_size = frame->to_allocate;
5073 if (frame->save_regs_using_mov)
5074 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5075 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5076 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5078 else
5079 frame->red_zone_size = 0;
5080 frame->to_allocate -= frame->red_zone_size;
5081 frame->stack_pointer_offset -= frame->red_zone_size;
5082 #if 0
5083 fprintf (stderr, "nregs: %i\n", frame->nregs);
5084 fprintf (stderr, "size: %i\n", size);
5085 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5086 fprintf (stderr, "padding1: %i\n", frame->padding1);
5087 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5088 fprintf (stderr, "padding2: %i\n", frame->padding2);
5089 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5090 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5091 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5092 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5093 frame->hard_frame_pointer_offset);
5094 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5095 #endif
5098 /* Emit code to save registers in the prologue. */
5100 static void
5101 ix86_emit_save_regs (void)
5103 unsigned int regno;
5104 rtx insn;
5106 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5107 if (ix86_save_reg (regno, true))
5109 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5110 RTX_FRAME_RELATED_P (insn) = 1;
5114 /* Emit code to save registers using MOV insns. First register
5115 is restored from POINTER + OFFSET. */
5116 static void
5117 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5119 unsigned int regno;
5120 rtx insn;
5122 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5123 if (ix86_save_reg (regno, true))
5125 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5126 Pmode, offset),
5127 gen_rtx_REG (Pmode, regno));
5128 RTX_FRAME_RELATED_P (insn) = 1;
5129 offset += UNITS_PER_WORD;
5133 /* Expand prologue or epilogue stack adjustment.
5134 The pattern exist to put a dependency on all ebp-based memory accesses.
5135 STYLE should be negative if instructions should be marked as frame related,
5136 zero if %r11 register is live and cannot be freely used and positive
5137 otherwise. */
5139 static void
5140 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5142 rtx insn;
5144 if (! TARGET_64BIT)
5145 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5146 else if (x86_64_immediate_operand (offset, DImode))
5147 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5148 else
5150 rtx r11;
5151 /* r11 is used by indirect sibcall return as well, set before the
5152 epilogue and used after the epilogue. ATM indirect sibcall
5153 shouldn't be used together with huge frame sizes in one
5154 function because of the frame_size check in sibcall.c. */
5155 gcc_assert (style);
5156 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5157 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5158 if (style < 0)
5159 RTX_FRAME_RELATED_P (insn) = 1;
5160 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5161 offset));
5163 if (style < 0)
5164 RTX_FRAME_RELATED_P (insn) = 1;
5167 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5169 static rtx
5170 ix86_internal_arg_pointer (void)
5172 if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5173 && DECL_NAME (current_function_decl)
5174 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5175 && DECL_FILE_SCOPE_P (current_function_decl))
5177 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5178 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5180 else
5181 return virtual_incoming_args_rtx;
5184 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5185 This is called from dwarf2out.c to emit call frame instructions
5186 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5187 static void
5188 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5190 rtx unspec = SET_SRC (pattern);
5191 gcc_assert (GET_CODE (unspec) == UNSPEC);
5193 switch (index)
5195 case UNSPEC_REG_SAVE:
5196 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5197 SET_DEST (pattern));
5198 break;
5199 case UNSPEC_DEF_CFA:
5200 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5201 INTVAL (XVECEXP (unspec, 0, 0)));
5202 break;
5203 default:
5204 gcc_unreachable ();
5208 /* Expand the prologue into a bunch of separate insns. */
5210 void
5211 ix86_expand_prologue (void)
5213 rtx insn;
5214 bool pic_reg_used;
5215 struct ix86_frame frame;
5216 HOST_WIDE_INT allocate;
5218 ix86_compute_frame_layout (&frame);
5220 if (cfun->machine->force_align_arg_pointer)
5222 rtx x, y;
5224 /* Grab the argument pointer. */
5225 x = plus_constant (stack_pointer_rtx, 4);
5226 y = cfun->machine->force_align_arg_pointer;
5227 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5228 RTX_FRAME_RELATED_P (insn) = 1;
5230 /* The unwind info consists of two parts: install the fafp as the cfa,
5231 and record the fafp as the "save register" of the stack pointer.
5232 The later is there in order that the unwinder can see where it
5233 should restore the stack pointer across the and insn. */
5234 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5235 x = gen_rtx_SET (VOIDmode, y, x);
5236 RTX_FRAME_RELATED_P (x) = 1;
5237 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5238 UNSPEC_REG_SAVE);
5239 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5240 RTX_FRAME_RELATED_P (y) = 1;
5241 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5242 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5243 REG_NOTES (insn) = x;
5245 /* Align the stack. */
5246 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5247 GEN_INT (-16)));
5249 /* And here we cheat like madmen with the unwind info. We force the
5250 cfa register back to sp+4, which is exactly what it was at the
5251 start of the function. Re-pushing the return address results in
5252 the return at the same spot relative to the cfa, and thus is
5253 correct wrt the unwind info. */
5254 x = cfun->machine->force_align_arg_pointer;
5255 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5256 insn = emit_insn (gen_push (x));
5257 RTX_FRAME_RELATED_P (insn) = 1;
5259 x = GEN_INT (4);
5260 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5261 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5262 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5263 REG_NOTES (insn) = x;
5266 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5267 slower on all targets. Also sdb doesn't like it. */
5269 if (frame_pointer_needed)
5271 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5272 RTX_FRAME_RELATED_P (insn) = 1;
5274 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5275 RTX_FRAME_RELATED_P (insn) = 1;
5278 allocate = frame.to_allocate;
5280 if (!frame.save_regs_using_mov)
5281 ix86_emit_save_regs ();
5282 else
5283 allocate += frame.nregs * UNITS_PER_WORD;
5285 /* When using red zone we may start register saving before allocating
5286 the stack frame saving one cycle of the prologue. */
5287 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5288 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5289 : stack_pointer_rtx,
5290 -frame.nregs * UNITS_PER_WORD);
5292 if (allocate == 0)
5294 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5295 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5296 GEN_INT (-allocate), -1);
5297 else
5299 /* Only valid for Win32. */
5300 rtx eax = gen_rtx_REG (SImode, 0);
5301 bool eax_live = ix86_eax_live_at_start_p ();
5302 rtx t;
5304 gcc_assert (!TARGET_64BIT);
5306 if (eax_live)
5308 emit_insn (gen_push (eax));
5309 allocate -= 4;
5312 emit_move_insn (eax, GEN_INT (allocate));
5314 insn = emit_insn (gen_allocate_stack_worker (eax));
5315 RTX_FRAME_RELATED_P (insn) = 1;
5316 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5317 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5318 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5319 t, REG_NOTES (insn));
5321 if (eax_live)
5323 if (frame_pointer_needed)
5324 t = plus_constant (hard_frame_pointer_rtx,
5325 allocate
5326 - frame.to_allocate
5327 - frame.nregs * UNITS_PER_WORD);
5328 else
5329 t = plus_constant (stack_pointer_rtx, allocate);
5330 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5334 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5336 if (!frame_pointer_needed || !frame.to_allocate)
5337 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5338 else
5339 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5340 -frame.nregs * UNITS_PER_WORD);
5343 pic_reg_used = false;
5344 if (pic_offset_table_rtx
5345 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5346 || current_function_profile))
5348 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5350 if (alt_pic_reg_used != INVALID_REGNUM)
5351 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5353 pic_reg_used = true;
5356 if (pic_reg_used)
5358 if (TARGET_64BIT)
5359 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5360 else
5361 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5363 /* Even with accurate pre-reload life analysis, we can wind up
5364 deleting all references to the pic register after reload.
5365 Consider if cross-jumping unifies two sides of a branch
5366 controlled by a comparison vs the only read from a global.
5367 In which case, allow the set_got to be deleted, though we're
5368 too late to do anything about the ebx save in the prologue. */
5369 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5372 /* Prevent function calls from be scheduled before the call to mcount.
5373 In the pic_reg_used case, make sure that the got load isn't deleted. */
5374 if (current_function_profile)
5375 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5378 /* Emit code to restore saved registers using MOV insns. First register
5379 is restored from POINTER + OFFSET. */
5380 static void
5381 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5382 int maybe_eh_return)
5384 int regno;
5385 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5387 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5388 if (ix86_save_reg (regno, maybe_eh_return))
5390 /* Ensure that adjust_address won't be forced to produce pointer
5391 out of range allowed by x86-64 instruction set. */
5392 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5394 rtx r11;
5396 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5397 emit_move_insn (r11, GEN_INT (offset));
5398 emit_insn (gen_adddi3 (r11, r11, pointer));
5399 base_address = gen_rtx_MEM (Pmode, r11);
5400 offset = 0;
5402 emit_move_insn (gen_rtx_REG (Pmode, regno),
5403 adjust_address (base_address, Pmode, offset));
5404 offset += UNITS_PER_WORD;
5408 /* Restore function stack, frame, and registers. */
5410 void
5411 ix86_expand_epilogue (int style)
5413 int regno;
5414 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5415 struct ix86_frame frame;
5416 HOST_WIDE_INT offset;
5418 ix86_compute_frame_layout (&frame);
5420 /* Calculate start of saved registers relative to ebp. Special care
5421 must be taken for the normal return case of a function using
5422 eh_return: the eax and edx registers are marked as saved, but not
5423 restored along this path. */
5424 offset = frame.nregs;
5425 if (current_function_calls_eh_return && style != 2)
5426 offset -= 2;
5427 offset *= -UNITS_PER_WORD;
5429 /* If we're only restoring one register and sp is not valid then
5430 using a move instruction to restore the register since it's
5431 less work than reloading sp and popping the register.
5433 The default code result in stack adjustment using add/lea instruction,
5434 while this code results in LEAVE instruction (or discrete equivalent),
5435 so it is profitable in some other cases as well. Especially when there
5436 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5437 and there is exactly one register to pop. This heuristic may need some
5438 tuning in future. */
5439 if ((!sp_valid && frame.nregs <= 1)
5440 || (TARGET_EPILOGUE_USING_MOVE
5441 && cfun->machine->use_fast_prologue_epilogue
5442 && (frame.nregs > 1 || frame.to_allocate))
5443 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5444 || (frame_pointer_needed && TARGET_USE_LEAVE
5445 && cfun->machine->use_fast_prologue_epilogue
5446 && frame.nregs == 1)
5447 || current_function_calls_eh_return)
5449 /* Restore registers. We can use ebp or esp to address the memory
5450 locations. If both are available, default to ebp, since offsets
5451 are known to be small. Only exception is esp pointing directly to the
5452 end of block of saved registers, where we may simplify addressing
5453 mode. */
5455 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5456 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5457 frame.to_allocate, style == 2);
5458 else
5459 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5460 offset, style == 2);
5462 /* eh_return epilogues need %ecx added to the stack pointer. */
5463 if (style == 2)
5465 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5467 if (frame_pointer_needed)
5469 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5470 tmp = plus_constant (tmp, UNITS_PER_WORD);
5471 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5473 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5474 emit_move_insn (hard_frame_pointer_rtx, tmp);
5476 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5477 const0_rtx, style);
5479 else
5481 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5482 tmp = plus_constant (tmp, (frame.to_allocate
5483 + frame.nregs * UNITS_PER_WORD));
5484 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5487 else if (!frame_pointer_needed)
5488 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5489 GEN_INT (frame.to_allocate
5490 + frame.nregs * UNITS_PER_WORD),
5491 style);
5492 /* If not an i386, mov & pop is faster than "leave". */
5493 else if (TARGET_USE_LEAVE || optimize_size
5494 || !cfun->machine->use_fast_prologue_epilogue)
5495 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5496 else
5498 pro_epilogue_adjust_stack (stack_pointer_rtx,
5499 hard_frame_pointer_rtx,
5500 const0_rtx, style);
5501 if (TARGET_64BIT)
5502 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5503 else
5504 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5507 else
5509 /* First step is to deallocate the stack frame so that we can
5510 pop the registers. */
5511 if (!sp_valid)
5513 gcc_assert (frame_pointer_needed);
5514 pro_epilogue_adjust_stack (stack_pointer_rtx,
5515 hard_frame_pointer_rtx,
5516 GEN_INT (offset), style);
5518 else if (frame.to_allocate)
5519 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5520 GEN_INT (frame.to_allocate), style);
5522 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5523 if (ix86_save_reg (regno, false))
5525 if (TARGET_64BIT)
5526 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5527 else
5528 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5530 if (frame_pointer_needed)
5532 /* Leave results in shorter dependency chains on CPUs that are
5533 able to grok it fast. */
5534 if (TARGET_USE_LEAVE)
5535 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5536 else if (TARGET_64BIT)
5537 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5538 else
5539 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5543 if (cfun->machine->force_align_arg_pointer)
5545 emit_insn (gen_addsi3 (stack_pointer_rtx,
5546 cfun->machine->force_align_arg_pointer,
5547 GEN_INT (-4)));
5550 /* Sibcall epilogues don't want a return instruction. */
5551 if (style == 0)
5552 return;
5554 if (current_function_pops_args && current_function_args_size)
5556 rtx popc = GEN_INT (current_function_pops_args);
5558 /* i386 can only pop 64K bytes. If asked to pop more, pop
5559 return address, do explicit add, and jump indirectly to the
5560 caller. */
5562 if (current_function_pops_args >= 65536)
5564 rtx ecx = gen_rtx_REG (SImode, 2);
5566 /* There is no "pascal" calling convention in 64bit ABI. */
5567 gcc_assert (!TARGET_64BIT);
5569 emit_insn (gen_popsi1 (ecx));
5570 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5571 emit_jump_insn (gen_return_indirect_internal (ecx));
5573 else
5574 emit_jump_insn (gen_return_pop_internal (popc));
5576 else
5577 emit_jump_insn (gen_return_internal ());
5580 /* Reset from the function's potential modifications. */
5582 static void
5583 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5584 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5586 if (pic_offset_table_rtx)
5587 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5590 /* Extract the parts of an RTL expression that is a valid memory address
5591 for an instruction. Return 0 if the structure of the address is
5592 grossly off. Return -1 if the address contains ASHIFT, so it is not
5593 strictly valid, but still used for computing length of lea instruction. */
5596 ix86_decompose_address (rtx addr, struct ix86_address *out)
5598 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5599 rtx base_reg, index_reg;
5600 HOST_WIDE_INT scale = 1;
5601 rtx scale_rtx = NULL_RTX;
5602 int retval = 1;
5603 enum ix86_address_seg seg = SEG_DEFAULT;
5605 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5606 base = addr;
5607 else if (GET_CODE (addr) == PLUS)
5609 rtx addends[4], op;
5610 int n = 0, i;
5612 op = addr;
5615 if (n >= 4)
5616 return 0;
5617 addends[n++] = XEXP (op, 1);
5618 op = XEXP (op, 0);
5620 while (GET_CODE (op) == PLUS);
5621 if (n >= 4)
5622 return 0;
5623 addends[n] = op;
5625 for (i = n; i >= 0; --i)
5627 op = addends[i];
5628 switch (GET_CODE (op))
5630 case MULT:
5631 if (index)
5632 return 0;
5633 index = XEXP (op, 0);
5634 scale_rtx = XEXP (op, 1);
5635 break;
5637 case UNSPEC:
5638 if (XINT (op, 1) == UNSPEC_TP
5639 && TARGET_TLS_DIRECT_SEG_REFS
5640 && seg == SEG_DEFAULT)
5641 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5642 else
5643 return 0;
5644 break;
5646 case REG:
5647 case SUBREG:
5648 if (!base)
5649 base = op;
5650 else if (!index)
5651 index = op;
5652 else
5653 return 0;
5654 break;
5656 case CONST:
5657 case CONST_INT:
5658 case SYMBOL_REF:
5659 case LABEL_REF:
5660 if (disp)
5661 return 0;
5662 disp = op;
5663 break;
5665 default:
5666 return 0;
5670 else if (GET_CODE (addr) == MULT)
5672 index = XEXP (addr, 0); /* index*scale */
5673 scale_rtx = XEXP (addr, 1);
5675 else if (GET_CODE (addr) == ASHIFT)
5677 rtx tmp;
5679 /* We're called for lea too, which implements ashift on occasion. */
5680 index = XEXP (addr, 0);
5681 tmp = XEXP (addr, 1);
5682 if (GET_CODE (tmp) != CONST_INT)
5683 return 0;
5684 scale = INTVAL (tmp);
5685 if ((unsigned HOST_WIDE_INT) scale > 3)
5686 return 0;
5687 scale = 1 << scale;
5688 retval = -1;
5690 else
5691 disp = addr; /* displacement */
5693 /* Extract the integral value of scale. */
5694 if (scale_rtx)
5696 if (GET_CODE (scale_rtx) != CONST_INT)
5697 return 0;
5698 scale = INTVAL (scale_rtx);
5701 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5702 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5704 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5705 if (base_reg && index_reg && scale == 1
5706 && (index_reg == arg_pointer_rtx
5707 || index_reg == frame_pointer_rtx
5708 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5710 rtx tmp;
5711 tmp = base, base = index, index = tmp;
5712 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5715 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5716 if ((base_reg == hard_frame_pointer_rtx
5717 || base_reg == frame_pointer_rtx
5718 || base_reg == arg_pointer_rtx) && !disp)
5719 disp = const0_rtx;
5721 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5722 Avoid this by transforming to [%esi+0]. */
5723 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5724 && base_reg && !index_reg && !disp
5725 && REG_P (base_reg)
5726 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5727 disp = const0_rtx;
5729 /* Special case: encode reg+reg instead of reg*2. */
5730 if (!base && index && scale && scale == 2)
5731 base = index, base_reg = index_reg, scale = 1;
5733 /* Special case: scaling cannot be encoded without base or displacement. */
5734 if (!base && !disp && index && scale != 1)
5735 disp = const0_rtx;
5737 out->base = base;
5738 out->index = index;
5739 out->disp = disp;
5740 out->scale = scale;
5741 out->seg = seg;
5743 return retval;
5746 /* Return cost of the memory address x.
5747 For i386, it is better to use a complex address than let gcc copy
5748 the address into a reg and make a new pseudo. But not if the address
5749 requires to two regs - that would mean more pseudos with longer
5750 lifetimes. */
5751 static int
5752 ix86_address_cost (rtx x)
5754 struct ix86_address parts;
5755 int cost = 1;
5756 int ok = ix86_decompose_address (x, &parts);
5758 gcc_assert (ok);
5760 if (parts.base && GET_CODE (parts.base) == SUBREG)
5761 parts.base = SUBREG_REG (parts.base);
5762 if (parts.index && GET_CODE (parts.index) == SUBREG)
5763 parts.index = SUBREG_REG (parts.index);
5765 /* More complex memory references are better. */
5766 if (parts.disp && parts.disp != const0_rtx)
5767 cost--;
5768 if (parts.seg != SEG_DEFAULT)
5769 cost--;
5771 /* Attempt to minimize number of registers in the address. */
5772 if ((parts.base
5773 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5774 || (parts.index
5775 && (!REG_P (parts.index)
5776 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5777 cost++;
5779 if (parts.base
5780 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5781 && parts.index
5782 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5783 && parts.base != parts.index)
5784 cost++;
5786 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5787 since it's predecode logic can't detect the length of instructions
5788 and it degenerates to vector decoded. Increase cost of such
5789 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5790 to split such addresses or even refuse such addresses at all.
5792 Following addressing modes are affected:
5793 [base+scale*index]
5794 [scale*index+disp]
5795 [base+index]
5797 The first and last case may be avoidable by explicitly coding the zero in
5798 memory address, but I don't have AMD-K6 machine handy to check this
5799 theory. */
5801 if (TARGET_K6
5802 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5803 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5804 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5805 cost += 10;
5807 return cost;
5810 /* If X is a machine specific address (i.e. a symbol or label being
5811 referenced as a displacement from the GOT implemented using an
5812 UNSPEC), then return the base term. Otherwise return X. */
5815 ix86_find_base_term (rtx x)
5817 rtx term;
5819 if (TARGET_64BIT)
5821 if (GET_CODE (x) != CONST)
5822 return x;
5823 term = XEXP (x, 0);
5824 if (GET_CODE (term) == PLUS
5825 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5826 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5827 term = XEXP (term, 0);
5828 if (GET_CODE (term) != UNSPEC
5829 || XINT (term, 1) != UNSPEC_GOTPCREL)
5830 return x;
5832 term = XVECEXP (term, 0, 0);
5834 if (GET_CODE (term) != SYMBOL_REF
5835 && GET_CODE (term) != LABEL_REF)
5836 return x;
5838 return term;
5841 term = ix86_delegitimize_address (x);
5843 if (GET_CODE (term) != SYMBOL_REF
5844 && GET_CODE (term) != LABEL_REF)
5845 return x;
5847 return term;
5850 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5851 this is used for to form addresses to local data when -fPIC is in
5852 use. */
5854 static bool
5855 darwin_local_data_pic (rtx disp)
5857 if (GET_CODE (disp) == MINUS)
5859 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5860 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5861 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5863 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5864 if (! strcmp (sym_name, "<pic base>"))
5865 return true;
5869 return false;
5872 /* Determine if a given RTX is a valid constant. We already know this
5873 satisfies CONSTANT_P. */
5875 bool
5876 legitimate_constant_p (rtx x)
5878 switch (GET_CODE (x))
5880 case CONST:
5881 x = XEXP (x, 0);
5883 if (GET_CODE (x) == PLUS)
5885 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5886 return false;
5887 x = XEXP (x, 0);
5890 if (TARGET_MACHO && darwin_local_data_pic (x))
5891 return true;
5893 /* Only some unspecs are valid as "constants". */
5894 if (GET_CODE (x) == UNSPEC)
5895 switch (XINT (x, 1))
5897 case UNSPEC_GOTOFF:
5898 return TARGET_64BIT;
5899 case UNSPEC_TPOFF:
5900 case UNSPEC_NTPOFF:
5901 x = XVECEXP (x, 0, 0);
5902 return (GET_CODE (x) == SYMBOL_REF
5903 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5904 case UNSPEC_DTPOFF:
5905 x = XVECEXP (x, 0, 0);
5906 return (GET_CODE (x) == SYMBOL_REF
5907 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
5908 default:
5909 return false;
5912 /* We must have drilled down to a symbol. */
5913 if (GET_CODE (x) == LABEL_REF)
5914 return true;
5915 if (GET_CODE (x) != SYMBOL_REF)
5916 return false;
5917 /* FALLTHRU */
5919 case SYMBOL_REF:
5920 /* TLS symbols are never valid. */
5921 if (SYMBOL_REF_TLS_MODEL (x))
5922 return false;
5923 break;
5925 default:
5926 break;
5929 /* Otherwise we handle everything else in the move patterns. */
5930 return true;
5933 /* Determine if it's legal to put X into the constant pool. This
5934 is not possible for the address of thread-local symbols, which
5935 is checked above. */
5937 static bool
5938 ix86_cannot_force_const_mem (rtx x)
5940 return !legitimate_constant_p (x);
5943 /* Determine if a given RTX is a valid constant address. */
5945 bool
5946 constant_address_p (rtx x)
5948 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5951 /* Nonzero if the constant value X is a legitimate general operand
5952 when generating PIC code. It is given that flag_pic is on and
5953 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5955 bool
5956 legitimate_pic_operand_p (rtx x)
5958 rtx inner;
5960 switch (GET_CODE (x))
5962 case CONST:
5963 inner = XEXP (x, 0);
5964 if (GET_CODE (inner) == PLUS
5965 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
5966 inner = XEXP (inner, 0);
5968 /* Only some unspecs are valid as "constants". */
5969 if (GET_CODE (inner) == UNSPEC)
5970 switch (XINT (inner, 1))
5972 case UNSPEC_GOTOFF:
5973 return TARGET_64BIT;
5974 case UNSPEC_TPOFF:
5975 x = XVECEXP (inner, 0, 0);
5976 return (GET_CODE (x) == SYMBOL_REF
5977 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5978 default:
5979 return false;
5981 /* FALLTHRU */
5983 case SYMBOL_REF:
5984 case LABEL_REF:
5985 return legitimate_pic_address_disp_p (x);
5987 default:
5988 return true;
5992 /* Determine if a given CONST RTX is a valid memory displacement
5993 in PIC mode. */
5996 legitimate_pic_address_disp_p (rtx disp)
5998 bool saw_plus;
6000 /* In 64bit mode we can allow direct addresses of symbols and labels
6001 when they are not dynamic symbols. */
6002 if (TARGET_64BIT)
6004 rtx op0 = disp, op1;
6006 switch (GET_CODE (disp))
6008 case LABEL_REF:
6009 return true;
6011 case CONST:
6012 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6013 break;
6014 op0 = XEXP (XEXP (disp, 0), 0);
6015 op1 = XEXP (XEXP (disp, 0), 1);
6016 if (GET_CODE (op1) != CONST_INT
6017 || INTVAL (op1) >= 16*1024*1024
6018 || INTVAL (op1) < -16*1024*1024)
6019 break;
6020 if (GET_CODE (op0) == LABEL_REF)
6021 return true;
6022 if (GET_CODE (op0) != SYMBOL_REF)
6023 break;
6024 /* FALLTHRU */
6026 case SYMBOL_REF:
6027 /* TLS references should always be enclosed in UNSPEC. */
6028 if (SYMBOL_REF_TLS_MODEL (op0))
6029 return false;
6030 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6031 return true;
6032 break;
6034 default:
6035 break;
6038 if (GET_CODE (disp) != CONST)
6039 return 0;
6040 disp = XEXP (disp, 0);
6042 if (TARGET_64BIT)
6044 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6045 of GOT tables. We should not need these anyway. */
6046 if (GET_CODE (disp) != UNSPEC
6047 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6048 && XINT (disp, 1) != UNSPEC_GOTOFF))
6049 return 0;
6051 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6052 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6053 return 0;
6054 return 1;
6057 saw_plus = false;
6058 if (GET_CODE (disp) == PLUS)
6060 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6061 return 0;
6062 disp = XEXP (disp, 0);
6063 saw_plus = true;
6066 if (TARGET_MACHO && darwin_local_data_pic (disp))
6067 return 1;
6069 if (GET_CODE (disp) != UNSPEC)
6070 return 0;
6072 switch (XINT (disp, 1))
6074 case UNSPEC_GOT:
6075 if (saw_plus)
6076 return false;
6077 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6078 case UNSPEC_GOTOFF:
6079 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6080 While ABI specify also 32bit relocation but we don't produce it in
6081 small PIC model at all. */
6082 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6083 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6084 && !TARGET_64BIT)
6085 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6086 return false;
6087 case UNSPEC_GOTTPOFF:
6088 case UNSPEC_GOTNTPOFF:
6089 case UNSPEC_INDNTPOFF:
6090 if (saw_plus)
6091 return false;
6092 disp = XVECEXP (disp, 0, 0);
6093 return (GET_CODE (disp) == SYMBOL_REF
6094 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6095 case UNSPEC_NTPOFF:
6096 disp = XVECEXP (disp, 0, 0);
6097 return (GET_CODE (disp) == SYMBOL_REF
6098 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6099 case UNSPEC_DTPOFF:
6100 disp = XVECEXP (disp, 0, 0);
6101 return (GET_CODE (disp) == SYMBOL_REF
6102 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6105 return 0;
6108 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6109 memory address for an instruction. The MODE argument is the machine mode
6110 for the MEM expression that wants to use this address.
6112 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6113 convert common non-canonical forms to canonical form so that they will
6114 be recognized. */
6117 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6119 struct ix86_address parts;
6120 rtx base, index, disp;
6121 HOST_WIDE_INT scale;
6122 const char *reason = NULL;
6123 rtx reason_rtx = NULL_RTX;
6125 if (TARGET_DEBUG_ADDR)
6127 fprintf (stderr,
6128 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6129 GET_MODE_NAME (mode), strict);
6130 debug_rtx (addr);
6133 if (ix86_decompose_address (addr, &parts) <= 0)
6135 reason = "decomposition failed";
6136 goto report_error;
6139 base = parts.base;
6140 index = parts.index;
6141 disp = parts.disp;
6142 scale = parts.scale;
6144 /* Validate base register.
6146 Don't allow SUBREG's that span more than a word here. It can lead to spill
6147 failures when the base is one word out of a two word structure, which is
6148 represented internally as a DImode int. */
6150 if (base)
6152 rtx reg;
6153 reason_rtx = base;
6155 if (REG_P (base))
6156 reg = base;
6157 else if (GET_CODE (base) == SUBREG
6158 && REG_P (SUBREG_REG (base))
6159 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6160 <= UNITS_PER_WORD)
6161 reg = SUBREG_REG (base);
6162 else
6164 reason = "base is not a register";
6165 goto report_error;
6168 if (GET_MODE (base) != Pmode)
6170 reason = "base is not in Pmode";
6171 goto report_error;
6174 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6175 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6177 reason = "base is not valid";
6178 goto report_error;
6182 /* Validate index register.
6184 Don't allow SUBREG's that span more than a word here -- same as above. */
6186 if (index)
6188 rtx reg;
6189 reason_rtx = index;
6191 if (REG_P (index))
6192 reg = index;
6193 else if (GET_CODE (index) == SUBREG
6194 && REG_P (SUBREG_REG (index))
6195 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6196 <= UNITS_PER_WORD)
6197 reg = SUBREG_REG (index);
6198 else
6200 reason = "index is not a register";
6201 goto report_error;
6204 if (GET_MODE (index) != Pmode)
6206 reason = "index is not in Pmode";
6207 goto report_error;
6210 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6211 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6213 reason = "index is not valid";
6214 goto report_error;
6218 /* Validate scale factor. */
6219 if (scale != 1)
6221 reason_rtx = GEN_INT (scale);
6222 if (!index)
6224 reason = "scale without index";
6225 goto report_error;
6228 if (scale != 2 && scale != 4 && scale != 8)
6230 reason = "scale is not a valid multiplier";
6231 goto report_error;
6235 /* Validate displacement. */
6236 if (disp)
6238 reason_rtx = disp;
6240 if (GET_CODE (disp) == CONST
6241 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6242 switch (XINT (XEXP (disp, 0), 1))
6244 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6245 used. While ABI specify also 32bit relocations, we don't produce
6246 them at all and use IP relative instead. */
6247 case UNSPEC_GOT:
6248 case UNSPEC_GOTOFF:
6249 gcc_assert (flag_pic);
6250 if (!TARGET_64BIT)
6251 goto is_legitimate_pic;
6252 reason = "64bit address unspec";
6253 goto report_error;
6255 case UNSPEC_GOTPCREL:
6256 gcc_assert (flag_pic);
6257 goto is_legitimate_pic;
6259 case UNSPEC_GOTTPOFF:
6260 case UNSPEC_GOTNTPOFF:
6261 case UNSPEC_INDNTPOFF:
6262 case UNSPEC_NTPOFF:
6263 case UNSPEC_DTPOFF:
6264 break;
6266 default:
6267 reason = "invalid address unspec";
6268 goto report_error;
6271 else if (flag_pic && (SYMBOLIC_CONST (disp)
6272 #if TARGET_MACHO
6273 && !machopic_operand_p (disp)
6274 #endif
6277 is_legitimate_pic:
6278 if (TARGET_64BIT && (index || base))
6280 /* foo@dtpoff(%rX) is ok. */
6281 if (GET_CODE (disp) != CONST
6282 || GET_CODE (XEXP (disp, 0)) != PLUS
6283 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6284 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6285 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6286 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6288 reason = "non-constant pic memory reference";
6289 goto report_error;
6292 else if (! legitimate_pic_address_disp_p (disp))
6294 reason = "displacement is an invalid pic construct";
6295 goto report_error;
6298 /* This code used to verify that a symbolic pic displacement
6299 includes the pic_offset_table_rtx register.
6301 While this is good idea, unfortunately these constructs may
6302 be created by "adds using lea" optimization for incorrect
6303 code like:
6305 int a;
6306 int foo(int i)
6308 return *(&a+i);
6311 This code is nonsensical, but results in addressing
6312 GOT table with pic_offset_table_rtx base. We can't
6313 just refuse it easily, since it gets matched by
6314 "addsi3" pattern, that later gets split to lea in the
6315 case output register differs from input. While this
6316 can be handled by separate addsi pattern for this case
6317 that never results in lea, this seems to be easier and
6318 correct fix for crash to disable this test. */
6320 else if (GET_CODE (disp) != LABEL_REF
6321 && GET_CODE (disp) != CONST_INT
6322 && (GET_CODE (disp) != CONST
6323 || !legitimate_constant_p (disp))
6324 && (GET_CODE (disp) != SYMBOL_REF
6325 || !legitimate_constant_p (disp)))
6327 reason = "displacement is not constant";
6328 goto report_error;
6330 else if (TARGET_64BIT
6331 && !x86_64_immediate_operand (disp, VOIDmode))
6333 reason = "displacement is out of range";
6334 goto report_error;
6338 /* Everything looks valid. */
6339 if (TARGET_DEBUG_ADDR)
6340 fprintf (stderr, "Success.\n");
6341 return TRUE;
6343 report_error:
6344 if (TARGET_DEBUG_ADDR)
6346 fprintf (stderr, "Error: %s\n", reason);
6347 debug_rtx (reason_rtx);
6349 return FALSE;
6352 /* Return a unique alias set for the GOT. */
6354 static HOST_WIDE_INT
6355 ix86_GOT_alias_set (void)
6357 static HOST_WIDE_INT set = -1;
6358 if (set == -1)
6359 set = new_alias_set ();
6360 return set;
6363 /* Return a legitimate reference for ORIG (an address) using the
6364 register REG. If REG is 0, a new pseudo is generated.
6366 There are two types of references that must be handled:
6368 1. Global data references must load the address from the GOT, via
6369 the PIC reg. An insn is emitted to do this load, and the reg is
6370 returned.
6372 2. Static data references, constant pool addresses, and code labels
6373 compute the address as an offset from the GOT, whose base is in
6374 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6375 differentiate them from global data objects. The returned
6376 address is the PIC reg + an unspec constant.
6378 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6379 reg also appears in the address. */
6381 static rtx
6382 legitimize_pic_address (rtx orig, rtx reg)
6384 rtx addr = orig;
6385 rtx new = orig;
6386 rtx base;
6388 #if TARGET_MACHO
6389 if (reg == 0)
6390 reg = gen_reg_rtx (Pmode);
6391 /* Use the generic Mach-O PIC machinery. */
6392 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6393 #endif
6395 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6396 new = addr;
6397 else if (TARGET_64BIT
6398 && ix86_cmodel != CM_SMALL_PIC
6399 && local_symbolic_operand (addr, Pmode))
6401 rtx tmpreg;
6402 /* This symbol may be referenced via a displacement from the PIC
6403 base address (@GOTOFF). */
6405 if (reload_in_progress)
6406 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6407 if (GET_CODE (addr) == CONST)
6408 addr = XEXP (addr, 0);
6409 if (GET_CODE (addr) == PLUS)
6411 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6412 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6414 else
6415 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6416 new = gen_rtx_CONST (Pmode, new);
6417 if (!reg)
6418 tmpreg = gen_reg_rtx (Pmode);
6419 else
6420 tmpreg = reg;
6421 emit_move_insn (tmpreg, new);
6423 if (reg != 0)
6425 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6426 tmpreg, 1, OPTAB_DIRECT);
6427 new = reg;
6429 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6431 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6433 /* This symbol may be referenced via a displacement from the PIC
6434 base address (@GOTOFF). */
6436 if (reload_in_progress)
6437 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6438 if (GET_CODE (addr) == CONST)
6439 addr = XEXP (addr, 0);
6440 if (GET_CODE (addr) == PLUS)
6442 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6443 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6445 else
6446 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6447 new = gen_rtx_CONST (Pmode, new);
6448 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6450 if (reg != 0)
6452 emit_move_insn (reg, new);
6453 new = reg;
6456 else if (GET_CODE (addr) == SYMBOL_REF)
6458 if (TARGET_64BIT)
6460 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6461 new = gen_rtx_CONST (Pmode, new);
6462 new = gen_const_mem (Pmode, new);
6463 set_mem_alias_set (new, ix86_GOT_alias_set ());
6465 if (reg == 0)
6466 reg = gen_reg_rtx (Pmode);
6467 /* Use directly gen_movsi, otherwise the address is loaded
6468 into register for CSE. We don't want to CSE this addresses,
6469 instead we CSE addresses from the GOT table, so skip this. */
6470 emit_insn (gen_movsi (reg, new));
6471 new = reg;
6473 else
6475 /* This symbol must be referenced via a load from the
6476 Global Offset Table (@GOT). */
6478 if (reload_in_progress)
6479 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6480 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6481 new = gen_rtx_CONST (Pmode, new);
6482 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6483 new = gen_const_mem (Pmode, new);
6484 set_mem_alias_set (new, ix86_GOT_alias_set ());
6486 if (reg == 0)
6487 reg = gen_reg_rtx (Pmode);
6488 emit_move_insn (reg, new);
6489 new = reg;
6492 else
6494 if (GET_CODE (addr) == CONST_INT
6495 && !x86_64_immediate_operand (addr, VOIDmode))
6497 if (reg)
6499 emit_move_insn (reg, addr);
6500 new = reg;
6502 else
6503 new = force_reg (Pmode, addr);
6505 else if (GET_CODE (addr) == CONST)
6507 addr = XEXP (addr, 0);
6509 /* We must match stuff we generate before. Assume the only
6510 unspecs that can get here are ours. Not that we could do
6511 anything with them anyway.... */
6512 if (GET_CODE (addr) == UNSPEC
6513 || (GET_CODE (addr) == PLUS
6514 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6515 return orig;
6516 gcc_assert (GET_CODE (addr) == PLUS);
6518 if (GET_CODE (addr) == PLUS)
6520 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6522 /* Check first to see if this is a constant offset from a @GOTOFF
6523 symbol reference. */
6524 if (local_symbolic_operand (op0, Pmode)
6525 && GET_CODE (op1) == CONST_INT)
6527 if (!TARGET_64BIT)
6529 if (reload_in_progress)
6530 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6531 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6532 UNSPEC_GOTOFF);
6533 new = gen_rtx_PLUS (Pmode, new, op1);
6534 new = gen_rtx_CONST (Pmode, new);
6535 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6537 if (reg != 0)
6539 emit_move_insn (reg, new);
6540 new = reg;
6543 else
6545 if (INTVAL (op1) < -16*1024*1024
6546 || INTVAL (op1) >= 16*1024*1024)
6548 if (!x86_64_immediate_operand (op1, Pmode))
6549 op1 = force_reg (Pmode, op1);
6550 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6554 else
6556 base = legitimize_pic_address (XEXP (addr, 0), reg);
6557 new = legitimize_pic_address (XEXP (addr, 1),
6558 base == reg ? NULL_RTX : reg);
6560 if (GET_CODE (new) == CONST_INT)
6561 new = plus_constant (base, INTVAL (new));
6562 else
6564 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6566 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6567 new = XEXP (new, 1);
6569 new = gen_rtx_PLUS (Pmode, base, new);
6574 return new;
6577 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6579 static rtx
6580 get_thread_pointer (int to_reg)
6582 rtx tp, reg, insn;
6584 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6585 if (!to_reg)
6586 return tp;
6588 reg = gen_reg_rtx (Pmode);
6589 insn = gen_rtx_SET (VOIDmode, reg, tp);
6590 insn = emit_insn (insn);
6592 return reg;
6595 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6596 false if we expect this to be used for a memory address and true if
6597 we expect to load the address into a register. */
6599 static rtx
6600 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6602 rtx dest, base, off, pic, tp;
6603 int type;
6605 switch (model)
6607 case TLS_MODEL_GLOBAL_DYNAMIC:
6608 dest = gen_reg_rtx (Pmode);
6609 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6611 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6613 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6615 start_sequence ();
6616 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6617 insns = get_insns ();
6618 end_sequence ();
6620 emit_libcall_block (insns, dest, rax, x);
6622 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6623 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6624 else
6625 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6627 if (TARGET_GNU2_TLS)
6629 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6631 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6633 break;
6635 case TLS_MODEL_LOCAL_DYNAMIC:
6636 base = gen_reg_rtx (Pmode);
6637 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6639 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6641 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6643 start_sequence ();
6644 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6645 insns = get_insns ();
6646 end_sequence ();
6648 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6649 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6650 emit_libcall_block (insns, base, rax, note);
6652 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6653 emit_insn (gen_tls_local_dynamic_base_64 (base));
6654 else
6655 emit_insn (gen_tls_local_dynamic_base_32 (base));
6657 if (TARGET_GNU2_TLS)
6659 rtx x = ix86_tls_module_base ();
6661 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, base));
6663 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6666 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6667 off = gen_rtx_CONST (Pmode, off);
6669 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6670 break;
6672 case TLS_MODEL_INITIAL_EXEC:
6673 if (TARGET_64BIT)
6675 pic = NULL;
6676 type = UNSPEC_GOTNTPOFF;
6678 else if (flag_pic)
6680 if (reload_in_progress)
6681 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6682 pic = pic_offset_table_rtx;
6683 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6685 else if (!TARGET_ANY_GNU_TLS)
6687 pic = gen_reg_rtx (Pmode);
6688 emit_insn (gen_set_got (pic));
6689 type = UNSPEC_GOTTPOFF;
6691 else
6693 pic = NULL;
6694 type = UNSPEC_INDNTPOFF;
6697 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6698 off = gen_rtx_CONST (Pmode, off);
6699 if (pic)
6700 off = gen_rtx_PLUS (Pmode, pic, off);
6701 off = gen_const_mem (Pmode, off);
6702 set_mem_alias_set (off, ix86_GOT_alias_set ());
6704 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6706 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6707 off = force_reg (Pmode, off);
6708 return gen_rtx_PLUS (Pmode, base, off);
6710 else
6712 base = get_thread_pointer (true);
6713 dest = gen_reg_rtx (Pmode);
6714 emit_insn (gen_subsi3 (dest, base, off));
6716 break;
6718 case TLS_MODEL_LOCAL_EXEC:
6719 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6720 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6721 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6722 off = gen_rtx_CONST (Pmode, off);
6724 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6726 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6727 return gen_rtx_PLUS (Pmode, base, off);
6729 else
6731 base = get_thread_pointer (true);
6732 dest = gen_reg_rtx (Pmode);
6733 emit_insn (gen_subsi3 (dest, base, off));
6735 break;
6737 default:
6738 gcc_unreachable ();
6741 return dest;
6744 /* Try machine-dependent ways of modifying an illegitimate address
6745 to be legitimate. If we find one, return the new, valid address.
6746 This macro is used in only one place: `memory_address' in explow.c.
6748 OLDX is the address as it was before break_out_memory_refs was called.
6749 In some cases it is useful to look at this to decide what needs to be done.
6751 MODE and WIN are passed so that this macro can use
6752 GO_IF_LEGITIMATE_ADDRESS.
6754 It is always safe for this macro to do nothing. It exists to recognize
6755 opportunities to optimize the output.
6757 For the 80386, we handle X+REG by loading X into a register R and
6758 using R+REG. R will go in a general reg and indexing will be used.
6759 However, if REG is a broken-out memory address or multiplication,
6760 nothing needs to be done because REG can certainly go in a general reg.
6762 When -fpic is used, special handling is needed for symbolic references.
6763 See comments by legitimize_pic_address in i386.c for details. */
6766 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6768 int changed = 0;
6769 unsigned log;
6771 if (TARGET_DEBUG_ADDR)
6773 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6774 GET_MODE_NAME (mode));
6775 debug_rtx (x);
6778 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6779 if (log)
6780 return legitimize_tls_address (x, log, false);
6781 if (GET_CODE (x) == CONST
6782 && GET_CODE (XEXP (x, 0)) == PLUS
6783 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6784 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6786 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6787 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6790 if (flag_pic && SYMBOLIC_CONST (x))
6791 return legitimize_pic_address (x, 0);
6793 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6794 if (GET_CODE (x) == ASHIFT
6795 && GET_CODE (XEXP (x, 1)) == CONST_INT
6796 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6798 changed = 1;
6799 log = INTVAL (XEXP (x, 1));
6800 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6801 GEN_INT (1 << log));
6804 if (GET_CODE (x) == PLUS)
6806 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6808 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6809 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6810 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6812 changed = 1;
6813 log = INTVAL (XEXP (XEXP (x, 0), 1));
6814 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6815 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6816 GEN_INT (1 << log));
6819 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6820 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6821 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6823 changed = 1;
6824 log = INTVAL (XEXP (XEXP (x, 1), 1));
6825 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6826 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6827 GEN_INT (1 << log));
6830 /* Put multiply first if it isn't already. */
6831 if (GET_CODE (XEXP (x, 1)) == MULT)
6833 rtx tmp = XEXP (x, 0);
6834 XEXP (x, 0) = XEXP (x, 1);
6835 XEXP (x, 1) = tmp;
6836 changed = 1;
6839 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6840 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6841 created by virtual register instantiation, register elimination, and
6842 similar optimizations. */
6843 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6845 changed = 1;
6846 x = gen_rtx_PLUS (Pmode,
6847 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6848 XEXP (XEXP (x, 1), 0)),
6849 XEXP (XEXP (x, 1), 1));
6852 /* Canonicalize
6853 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6854 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6855 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6856 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6857 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6858 && CONSTANT_P (XEXP (x, 1)))
6860 rtx constant;
6861 rtx other = NULL_RTX;
6863 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6865 constant = XEXP (x, 1);
6866 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6868 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6870 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6871 other = XEXP (x, 1);
6873 else
6874 constant = 0;
6876 if (constant)
6878 changed = 1;
6879 x = gen_rtx_PLUS (Pmode,
6880 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6881 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6882 plus_constant (other, INTVAL (constant)));
6886 if (changed && legitimate_address_p (mode, x, FALSE))
6887 return x;
6889 if (GET_CODE (XEXP (x, 0)) == MULT)
6891 changed = 1;
6892 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6895 if (GET_CODE (XEXP (x, 1)) == MULT)
6897 changed = 1;
6898 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6901 if (changed
6902 && GET_CODE (XEXP (x, 1)) == REG
6903 && GET_CODE (XEXP (x, 0)) == REG)
6904 return x;
6906 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6908 changed = 1;
6909 x = legitimize_pic_address (x, 0);
6912 if (changed && legitimate_address_p (mode, x, FALSE))
6913 return x;
6915 if (GET_CODE (XEXP (x, 0)) == REG)
6917 rtx temp = gen_reg_rtx (Pmode);
6918 rtx val = force_operand (XEXP (x, 1), temp);
6919 if (val != temp)
6920 emit_move_insn (temp, val);
6922 XEXP (x, 1) = temp;
6923 return x;
6926 else if (GET_CODE (XEXP (x, 1)) == REG)
6928 rtx temp = gen_reg_rtx (Pmode);
6929 rtx val = force_operand (XEXP (x, 0), temp);
6930 if (val != temp)
6931 emit_move_insn (temp, val);
6933 XEXP (x, 0) = temp;
6934 return x;
6938 return x;
6941 /* Print an integer constant expression in assembler syntax. Addition
6942 and subtraction are the only arithmetic that may appear in these
6943 expressions. FILE is the stdio stream to write to, X is the rtx, and
6944 CODE is the operand print code from the output string. */
6946 static void
6947 output_pic_addr_const (FILE *file, rtx x, int code)
6949 char buf[256];
6951 switch (GET_CODE (x))
6953 case PC:
6954 gcc_assert (flag_pic);
6955 putc ('.', file);
6956 break;
6958 case SYMBOL_REF:
6959 output_addr_const (file, x);
6960 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6961 fputs ("@PLT", file);
6962 break;
6964 case LABEL_REF:
6965 x = XEXP (x, 0);
6966 /* FALLTHRU */
6967 case CODE_LABEL:
6968 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6969 assemble_name (asm_out_file, buf);
6970 break;
6972 case CONST_INT:
6973 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6974 break;
6976 case CONST:
6977 /* This used to output parentheses around the expression,
6978 but that does not work on the 386 (either ATT or BSD assembler). */
6979 output_pic_addr_const (file, XEXP (x, 0), code);
6980 break;
6982 case CONST_DOUBLE:
6983 if (GET_MODE (x) == VOIDmode)
6985 /* We can use %d if the number is <32 bits and positive. */
6986 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6987 fprintf (file, "0x%lx%08lx",
6988 (unsigned long) CONST_DOUBLE_HIGH (x),
6989 (unsigned long) CONST_DOUBLE_LOW (x));
6990 else
6991 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6993 else
6994 /* We can't handle floating point constants;
6995 PRINT_OPERAND must handle them. */
6996 output_operand_lossage ("floating constant misused");
6997 break;
6999 case PLUS:
7000 /* Some assemblers need integer constants to appear first. */
7001 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7003 output_pic_addr_const (file, XEXP (x, 0), code);
7004 putc ('+', file);
7005 output_pic_addr_const (file, XEXP (x, 1), code);
7007 else
7009 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7010 output_pic_addr_const (file, XEXP (x, 1), code);
7011 putc ('+', file);
7012 output_pic_addr_const (file, XEXP (x, 0), code);
7014 break;
7016 case MINUS:
7017 if (!TARGET_MACHO)
7018 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7019 output_pic_addr_const (file, XEXP (x, 0), code);
7020 putc ('-', file);
7021 output_pic_addr_const (file, XEXP (x, 1), code);
7022 if (!TARGET_MACHO)
7023 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7024 break;
7026 case UNSPEC:
7027 gcc_assert (XVECLEN (x, 0) == 1);
7028 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7029 switch (XINT (x, 1))
7031 case UNSPEC_GOT:
7032 fputs ("@GOT", file);
7033 break;
7034 case UNSPEC_GOTOFF:
7035 fputs ("@GOTOFF", file);
7036 break;
7037 case UNSPEC_GOTPCREL:
7038 fputs ("@GOTPCREL(%rip)", file);
7039 break;
7040 case UNSPEC_GOTTPOFF:
7041 /* FIXME: This might be @TPOFF in Sun ld too. */
7042 fputs ("@GOTTPOFF", file);
7043 break;
7044 case UNSPEC_TPOFF:
7045 fputs ("@TPOFF", file);
7046 break;
7047 case UNSPEC_NTPOFF:
7048 if (TARGET_64BIT)
7049 fputs ("@TPOFF", file);
7050 else
7051 fputs ("@NTPOFF", file);
7052 break;
7053 case UNSPEC_DTPOFF:
7054 fputs ("@DTPOFF", file);
7055 break;
7056 case UNSPEC_GOTNTPOFF:
7057 if (TARGET_64BIT)
7058 fputs ("@GOTTPOFF(%rip)", file);
7059 else
7060 fputs ("@GOTNTPOFF", file);
7061 break;
7062 case UNSPEC_INDNTPOFF:
7063 fputs ("@INDNTPOFF", file);
7064 break;
7065 default:
7066 output_operand_lossage ("invalid UNSPEC as operand");
7067 break;
7069 break;
7071 default:
7072 output_operand_lossage ("invalid expression as operand");
7076 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7077 We need to emit DTP-relative relocations. */
7079 static void
7080 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7082 fputs (ASM_LONG, file);
7083 output_addr_const (file, x);
7084 fputs ("@DTPOFF", file);
7085 switch (size)
7087 case 4:
7088 break;
7089 case 8:
7090 fputs (", 0", file);
7091 break;
7092 default:
7093 gcc_unreachable ();
7097 /* In the name of slightly smaller debug output, and to cater to
7098 general assembler lossage, recognize PIC+GOTOFF and turn it back
7099 into a direct symbol reference.
7101 On Darwin, this is necessary to avoid a crash, because Darwin
7102 has a different PIC label for each routine but the DWARF debugging
7103 information is not associated with any particular routine, so it's
7104 necessary to remove references to the PIC label from RTL stored by
7105 the DWARF output code. */
7107 static rtx
7108 ix86_delegitimize_address (rtx orig_x)
7110 rtx x = orig_x;
7111 /* reg_addend is NULL or a multiple of some register. */
7112 rtx reg_addend = NULL_RTX;
7113 /* const_addend is NULL or a const_int. */
7114 rtx const_addend = NULL_RTX;
7115 /* This is the result, or NULL. */
7116 rtx result = NULL_RTX;
7118 if (GET_CODE (x) == MEM)
7119 x = XEXP (x, 0);
7121 if (TARGET_64BIT)
7123 if (GET_CODE (x) != CONST
7124 || GET_CODE (XEXP (x, 0)) != UNSPEC
7125 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7126 || GET_CODE (orig_x) != MEM)
7127 return orig_x;
7128 return XVECEXP (XEXP (x, 0), 0, 0);
7131 if (GET_CODE (x) != PLUS
7132 || GET_CODE (XEXP (x, 1)) != CONST)
7133 return orig_x;
7135 if (GET_CODE (XEXP (x, 0)) == REG
7136 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7137 /* %ebx + GOT/GOTOFF */
7139 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7141 /* %ebx + %reg * scale + GOT/GOTOFF */
7142 reg_addend = XEXP (x, 0);
7143 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7144 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7145 reg_addend = XEXP (reg_addend, 1);
7146 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7147 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7148 reg_addend = XEXP (reg_addend, 0);
7149 else
7150 return orig_x;
7151 if (GET_CODE (reg_addend) != REG
7152 && GET_CODE (reg_addend) != MULT
7153 && GET_CODE (reg_addend) != ASHIFT)
7154 return orig_x;
7156 else
7157 return orig_x;
7159 x = XEXP (XEXP (x, 1), 0);
7160 if (GET_CODE (x) == PLUS
7161 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7163 const_addend = XEXP (x, 1);
7164 x = XEXP (x, 0);
7167 if (GET_CODE (x) == UNSPEC
7168 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7169 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7170 result = XVECEXP (x, 0, 0);
7172 if (TARGET_MACHO && darwin_local_data_pic (x)
7173 && GET_CODE (orig_x) != MEM)
7174 result = XEXP (x, 0);
7176 if (! result)
7177 return orig_x;
7179 if (const_addend)
7180 result = gen_rtx_PLUS (Pmode, result, const_addend);
7181 if (reg_addend)
7182 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7183 return result;
7186 static void
7187 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7188 int fp, FILE *file)
7190 const char *suffix;
7192 if (mode == CCFPmode || mode == CCFPUmode)
7194 enum rtx_code second_code, bypass_code;
7195 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7196 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7197 code = ix86_fp_compare_code_to_integer (code);
7198 mode = CCmode;
7200 if (reverse)
7201 code = reverse_condition (code);
7203 switch (code)
7205 case EQ:
7206 suffix = "e";
7207 break;
7208 case NE:
7209 suffix = "ne";
7210 break;
7211 case GT:
7212 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7213 suffix = "g";
7214 break;
7215 case GTU:
7216 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7217 Those same assemblers have the same but opposite lossage on cmov. */
7218 gcc_assert (mode == CCmode);
7219 suffix = fp ? "nbe" : "a";
7220 break;
7221 case LT:
7222 switch (mode)
7224 case CCNOmode:
7225 case CCGOCmode:
7226 suffix = "s";
7227 break;
7229 case CCmode:
7230 case CCGCmode:
7231 suffix = "l";
7232 break;
7234 default:
7235 gcc_unreachable ();
7237 break;
7238 case LTU:
7239 gcc_assert (mode == CCmode);
7240 suffix = "b";
7241 break;
7242 case GE:
7243 switch (mode)
7245 case CCNOmode:
7246 case CCGOCmode:
7247 suffix = "ns";
7248 break;
7250 case CCmode:
7251 case CCGCmode:
7252 suffix = "ge";
7253 break;
7255 default:
7256 gcc_unreachable ();
7258 break;
7259 case GEU:
7260 /* ??? As above. */
7261 gcc_assert (mode == CCmode);
7262 suffix = fp ? "nb" : "ae";
7263 break;
7264 case LE:
7265 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7266 suffix = "le";
7267 break;
7268 case LEU:
7269 gcc_assert (mode == CCmode);
7270 suffix = "be";
7271 break;
7272 case UNORDERED:
7273 suffix = fp ? "u" : "p";
7274 break;
7275 case ORDERED:
7276 suffix = fp ? "nu" : "np";
7277 break;
7278 default:
7279 gcc_unreachable ();
7281 fputs (suffix, file);
7284 /* Print the name of register X to FILE based on its machine mode and number.
7285 If CODE is 'w', pretend the mode is HImode.
7286 If CODE is 'b', pretend the mode is QImode.
7287 If CODE is 'k', pretend the mode is SImode.
7288 If CODE is 'q', pretend the mode is DImode.
7289 If CODE is 'h', pretend the reg is the 'high' byte register.
7290 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7292 void
7293 print_reg (rtx x, int code, FILE *file)
7295 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7296 && REGNO (x) != FRAME_POINTER_REGNUM
7297 && REGNO (x) != FLAGS_REG
7298 && REGNO (x) != FPSR_REG);
7300 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7301 putc ('%', file);
7303 if (code == 'w' || MMX_REG_P (x))
7304 code = 2;
7305 else if (code == 'b')
7306 code = 1;
7307 else if (code == 'k')
7308 code = 4;
7309 else if (code == 'q')
7310 code = 8;
7311 else if (code == 'y')
7312 code = 3;
7313 else if (code == 'h')
7314 code = 0;
7315 else
7316 code = GET_MODE_SIZE (GET_MODE (x));
7318 /* Irritatingly, AMD extended registers use different naming convention
7319 from the normal registers. */
7320 if (REX_INT_REG_P (x))
7322 gcc_assert (TARGET_64BIT);
7323 switch (code)
7325 case 0:
7326 error ("extended registers have no high halves");
7327 break;
7328 case 1:
7329 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7330 break;
7331 case 2:
7332 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7333 break;
7334 case 4:
7335 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7336 break;
7337 case 8:
7338 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7339 break;
7340 default:
7341 error ("unsupported operand size for extended register");
7342 break;
7344 return;
7346 switch (code)
7348 case 3:
7349 if (STACK_TOP_P (x))
7351 fputs ("st(0)", file);
7352 break;
7354 /* FALLTHRU */
7355 case 8:
7356 case 4:
7357 case 12:
7358 if (! ANY_FP_REG_P (x))
7359 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7360 /* FALLTHRU */
7361 case 16:
7362 case 2:
7363 normal:
7364 fputs (hi_reg_name[REGNO (x)], file);
7365 break;
7366 case 1:
7367 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7368 goto normal;
7369 fputs (qi_reg_name[REGNO (x)], file);
7370 break;
7371 case 0:
7372 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7373 goto normal;
7374 fputs (qi_high_reg_name[REGNO (x)], file);
7375 break;
7376 default:
7377 gcc_unreachable ();
7381 /* Locate some local-dynamic symbol still in use by this function
7382 so that we can print its name in some tls_local_dynamic_base
7383 pattern. */
7385 static const char *
7386 get_some_local_dynamic_name (void)
7388 rtx insn;
7390 if (cfun->machine->some_ld_name)
7391 return cfun->machine->some_ld_name;
7393 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7394 if (INSN_P (insn)
7395 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7396 return cfun->machine->some_ld_name;
7398 gcc_unreachable ();
7401 static int
7402 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7404 rtx x = *px;
7406 if (GET_CODE (x) == SYMBOL_REF
7407 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7409 cfun->machine->some_ld_name = XSTR (x, 0);
7410 return 1;
7413 return 0;
7416 /* Meaning of CODE:
7417 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7418 C -- print opcode suffix for set/cmov insn.
7419 c -- like C, but print reversed condition
7420 F,f -- likewise, but for floating-point.
7421 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7422 otherwise nothing
7423 R -- print the prefix for register names.
7424 z -- print the opcode suffix for the size of the current operand.
7425 * -- print a star (in certain assembler syntax)
7426 A -- print an absolute memory reference.
7427 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7428 s -- print a shift double count, followed by the assemblers argument
7429 delimiter.
7430 b -- print the QImode name of the register for the indicated operand.
7431 %b0 would print %al if operands[0] is reg 0.
7432 w -- likewise, print the HImode name of the register.
7433 k -- likewise, print the SImode name of the register.
7434 q -- likewise, print the DImode name of the register.
7435 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7436 y -- print "st(0)" instead of "st" as a register.
7437 D -- print condition for SSE cmp instruction.
7438 P -- if PIC, print an @PLT suffix.
7439 X -- don't print any sort of PIC '@' suffix for a symbol.
7440 & -- print some in-use local-dynamic symbol name.
7441 H -- print a memory address offset by 8; used for sse high-parts
7444 void
7445 print_operand (FILE *file, rtx x, int code)
7447 if (code)
7449 switch (code)
7451 case '*':
7452 if (ASSEMBLER_DIALECT == ASM_ATT)
7453 putc ('*', file);
7454 return;
7456 case '&':
7457 assemble_name (file, get_some_local_dynamic_name ());
7458 return;
7460 case 'A':
7461 switch (ASSEMBLER_DIALECT)
7463 case ASM_ATT:
7464 putc ('*', file);
7465 break;
7467 case ASM_INTEL:
7468 /* Intel syntax. For absolute addresses, registers should not
7469 be surrounded by braces. */
7470 if (GET_CODE (x) != REG)
7472 putc ('[', file);
7473 PRINT_OPERAND (file, x, 0);
7474 putc (']', file);
7475 return;
7477 break;
7479 default:
7480 gcc_unreachable ();
7483 PRINT_OPERAND (file, x, 0);
7484 return;
7487 case 'L':
7488 if (ASSEMBLER_DIALECT == ASM_ATT)
7489 putc ('l', file);
7490 return;
7492 case 'W':
7493 if (ASSEMBLER_DIALECT == ASM_ATT)
7494 putc ('w', file);
7495 return;
7497 case 'B':
7498 if (ASSEMBLER_DIALECT == ASM_ATT)
7499 putc ('b', file);
7500 return;
7502 case 'Q':
7503 if (ASSEMBLER_DIALECT == ASM_ATT)
7504 putc ('l', file);
7505 return;
7507 case 'S':
7508 if (ASSEMBLER_DIALECT == ASM_ATT)
7509 putc ('s', file);
7510 return;
7512 case 'T':
7513 if (ASSEMBLER_DIALECT == ASM_ATT)
7514 putc ('t', file);
7515 return;
7517 case 'z':
7518 /* 387 opcodes don't get size suffixes if the operands are
7519 registers. */
7520 if (STACK_REG_P (x))
7521 return;
7523 /* Likewise if using Intel opcodes. */
7524 if (ASSEMBLER_DIALECT == ASM_INTEL)
7525 return;
7527 /* This is the size of op from size of operand. */
7528 switch (GET_MODE_SIZE (GET_MODE (x)))
7530 case 2:
7531 #ifdef HAVE_GAS_FILDS_FISTS
7532 putc ('s', file);
7533 #endif
7534 return;
7536 case 4:
7537 if (GET_MODE (x) == SFmode)
7539 putc ('s', file);
7540 return;
7542 else
7543 putc ('l', file);
7544 return;
7546 case 12:
7547 case 16:
7548 putc ('t', file);
7549 return;
7551 case 8:
7552 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7554 #ifdef GAS_MNEMONICS
7555 putc ('q', file);
7556 #else
7557 putc ('l', file);
7558 putc ('l', file);
7559 #endif
7561 else
7562 putc ('l', file);
7563 return;
7565 default:
7566 gcc_unreachable ();
7569 case 'b':
7570 case 'w':
7571 case 'k':
7572 case 'q':
7573 case 'h':
7574 case 'y':
7575 case 'X':
7576 case 'P':
7577 break;
7579 case 's':
7580 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7582 PRINT_OPERAND (file, x, 0);
7583 putc (',', file);
7585 return;
7587 case 'D':
7588 /* Little bit of braindamage here. The SSE compare instructions
7589 does use completely different names for the comparisons that the
7590 fp conditional moves. */
7591 switch (GET_CODE (x))
7593 case EQ:
7594 case UNEQ:
7595 fputs ("eq", file);
7596 break;
7597 case LT:
7598 case UNLT:
7599 fputs ("lt", file);
7600 break;
7601 case LE:
7602 case UNLE:
7603 fputs ("le", file);
7604 break;
7605 case UNORDERED:
7606 fputs ("unord", file);
7607 break;
7608 case NE:
7609 case LTGT:
7610 fputs ("neq", file);
7611 break;
7612 case UNGE:
7613 case GE:
7614 fputs ("nlt", file);
7615 break;
7616 case UNGT:
7617 case GT:
7618 fputs ("nle", file);
7619 break;
7620 case ORDERED:
7621 fputs ("ord", file);
7622 break;
7623 default:
7624 gcc_unreachable ();
7626 return;
7627 case 'O':
7628 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7629 if (ASSEMBLER_DIALECT == ASM_ATT)
7631 switch (GET_MODE (x))
7633 case HImode: putc ('w', file); break;
7634 case SImode:
7635 case SFmode: putc ('l', file); break;
7636 case DImode:
7637 case DFmode: putc ('q', file); break;
7638 default: gcc_unreachable ();
7640 putc ('.', file);
7642 #endif
7643 return;
7644 case 'C':
7645 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7646 return;
7647 case 'F':
7648 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7649 if (ASSEMBLER_DIALECT == ASM_ATT)
7650 putc ('.', file);
7651 #endif
7652 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7653 return;
7655 /* Like above, but reverse condition */
7656 case 'c':
7657 /* Check to see if argument to %c is really a constant
7658 and not a condition code which needs to be reversed. */
7659 if (!COMPARISON_P (x))
7661 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7662 return;
7664 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7665 return;
7666 case 'f':
7667 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7668 if (ASSEMBLER_DIALECT == ASM_ATT)
7669 putc ('.', file);
7670 #endif
7671 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7672 return;
7674 case 'H':
7675 /* It doesn't actually matter what mode we use here, as we're
7676 only going to use this for printing. */
7677 x = adjust_address_nv (x, DImode, 8);
7678 break;
7680 case '+':
7682 rtx x;
7684 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7685 return;
7687 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7688 if (x)
7690 int pred_val = INTVAL (XEXP (x, 0));
7692 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7693 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7695 int taken = pred_val > REG_BR_PROB_BASE / 2;
7696 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7698 /* Emit hints only in the case default branch prediction
7699 heuristics would fail. */
7700 if (taken != cputaken)
7702 /* We use 3e (DS) prefix for taken branches and
7703 2e (CS) prefix for not taken branches. */
7704 if (taken)
7705 fputs ("ds ; ", file);
7706 else
7707 fputs ("cs ; ", file);
7711 return;
7713 default:
7714 output_operand_lossage ("invalid operand code '%c'", code);
7718 if (GET_CODE (x) == REG)
7719 print_reg (x, code, file);
7721 else if (GET_CODE (x) == MEM)
7723 /* No `byte ptr' prefix for call instructions. */
7724 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7726 const char * size;
7727 switch (GET_MODE_SIZE (GET_MODE (x)))
7729 case 1: size = "BYTE"; break;
7730 case 2: size = "WORD"; break;
7731 case 4: size = "DWORD"; break;
7732 case 8: size = "QWORD"; break;
7733 case 12: size = "XWORD"; break;
7734 case 16: size = "XMMWORD"; break;
7735 default:
7736 gcc_unreachable ();
7739 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7740 if (code == 'b')
7741 size = "BYTE";
7742 else if (code == 'w')
7743 size = "WORD";
7744 else if (code == 'k')
7745 size = "DWORD";
7747 fputs (size, file);
7748 fputs (" PTR ", file);
7751 x = XEXP (x, 0);
7752 /* Avoid (%rip) for call operands. */
7753 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7754 && GET_CODE (x) != CONST_INT)
7755 output_addr_const (file, x);
7756 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7757 output_operand_lossage ("invalid constraints for operand");
7758 else
7759 output_address (x);
7762 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7764 REAL_VALUE_TYPE r;
7765 long l;
7767 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7768 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7770 if (ASSEMBLER_DIALECT == ASM_ATT)
7771 putc ('$', file);
7772 fprintf (file, "0x%08lx", l);
7775 /* These float cases don't actually occur as immediate operands. */
7776 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7778 char dstr[30];
7780 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7781 fprintf (file, "%s", dstr);
7784 else if (GET_CODE (x) == CONST_DOUBLE
7785 && GET_MODE (x) == XFmode)
7787 char dstr[30];
7789 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7790 fprintf (file, "%s", dstr);
7793 else
7795 /* We have patterns that allow zero sets of memory, for instance.
7796 In 64-bit mode, we should probably support all 8-byte vectors,
7797 since we can in fact encode that into an immediate. */
7798 if (GET_CODE (x) == CONST_VECTOR)
7800 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7801 x = const0_rtx;
7804 if (code != 'P')
7806 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7808 if (ASSEMBLER_DIALECT == ASM_ATT)
7809 putc ('$', file);
7811 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7812 || GET_CODE (x) == LABEL_REF)
7814 if (ASSEMBLER_DIALECT == ASM_ATT)
7815 putc ('$', file);
7816 else
7817 fputs ("OFFSET FLAT:", file);
7820 if (GET_CODE (x) == CONST_INT)
7821 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7822 else if (flag_pic)
7823 output_pic_addr_const (file, x, code);
7824 else
7825 output_addr_const (file, x);
7829 /* Print a memory operand whose address is ADDR. */
7831 void
7832 print_operand_address (FILE *file, rtx addr)
7834 struct ix86_address parts;
7835 rtx base, index, disp;
7836 int scale;
7837 int ok = ix86_decompose_address (addr, &parts);
7839 gcc_assert (ok);
7841 base = parts.base;
7842 index = parts.index;
7843 disp = parts.disp;
7844 scale = parts.scale;
7846 switch (parts.seg)
7848 case SEG_DEFAULT:
7849 break;
7850 case SEG_FS:
7851 case SEG_GS:
7852 if (USER_LABEL_PREFIX[0] == 0)
7853 putc ('%', file);
7854 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7855 break;
7856 default:
7857 gcc_unreachable ();
7860 if (!base && !index)
7862 /* Displacement only requires special attention. */
7864 if (GET_CODE (disp) == CONST_INT)
7866 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7868 if (USER_LABEL_PREFIX[0] == 0)
7869 putc ('%', file);
7870 fputs ("ds:", file);
7872 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7874 else if (flag_pic)
7875 output_pic_addr_const (file, disp, 0);
7876 else
7877 output_addr_const (file, disp);
7879 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7880 if (TARGET_64BIT)
7882 if (GET_CODE (disp) == CONST
7883 && GET_CODE (XEXP (disp, 0)) == PLUS
7884 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7885 disp = XEXP (XEXP (disp, 0), 0);
7886 if (GET_CODE (disp) == LABEL_REF
7887 || (GET_CODE (disp) == SYMBOL_REF
7888 && SYMBOL_REF_TLS_MODEL (disp) == 0))
7889 fputs ("(%rip)", file);
7892 else
7894 if (ASSEMBLER_DIALECT == ASM_ATT)
7896 if (disp)
7898 if (flag_pic)
7899 output_pic_addr_const (file, disp, 0);
7900 else if (GET_CODE (disp) == LABEL_REF)
7901 output_asm_label (disp);
7902 else
7903 output_addr_const (file, disp);
7906 putc ('(', file);
7907 if (base)
7908 print_reg (base, 0, file);
7909 if (index)
7911 putc (',', file);
7912 print_reg (index, 0, file);
7913 if (scale != 1)
7914 fprintf (file, ",%d", scale);
7916 putc (')', file);
7918 else
7920 rtx offset = NULL_RTX;
7922 if (disp)
7924 /* Pull out the offset of a symbol; print any symbol itself. */
7925 if (GET_CODE (disp) == CONST
7926 && GET_CODE (XEXP (disp, 0)) == PLUS
7927 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7929 offset = XEXP (XEXP (disp, 0), 1);
7930 disp = gen_rtx_CONST (VOIDmode,
7931 XEXP (XEXP (disp, 0), 0));
7934 if (flag_pic)
7935 output_pic_addr_const (file, disp, 0);
7936 else if (GET_CODE (disp) == LABEL_REF)
7937 output_asm_label (disp);
7938 else if (GET_CODE (disp) == CONST_INT)
7939 offset = disp;
7940 else
7941 output_addr_const (file, disp);
7944 putc ('[', file);
7945 if (base)
7947 print_reg (base, 0, file);
7948 if (offset)
7950 if (INTVAL (offset) >= 0)
7951 putc ('+', file);
7952 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7955 else if (offset)
7956 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7957 else
7958 putc ('0', file);
7960 if (index)
7962 putc ('+', file);
7963 print_reg (index, 0, file);
7964 if (scale != 1)
7965 fprintf (file, "*%d", scale);
7967 putc (']', file);
7972 bool
7973 output_addr_const_extra (FILE *file, rtx x)
7975 rtx op;
7977 if (GET_CODE (x) != UNSPEC)
7978 return false;
7980 op = XVECEXP (x, 0, 0);
7981 switch (XINT (x, 1))
7983 case UNSPEC_GOTTPOFF:
7984 output_addr_const (file, op);
7985 /* FIXME: This might be @TPOFF in Sun ld. */
7986 fputs ("@GOTTPOFF", file);
7987 break;
7988 case UNSPEC_TPOFF:
7989 output_addr_const (file, op);
7990 fputs ("@TPOFF", file);
7991 break;
7992 case UNSPEC_NTPOFF:
7993 output_addr_const (file, op);
7994 if (TARGET_64BIT)
7995 fputs ("@TPOFF", file);
7996 else
7997 fputs ("@NTPOFF", file);
7998 break;
7999 case UNSPEC_DTPOFF:
8000 output_addr_const (file, op);
8001 fputs ("@DTPOFF", file);
8002 break;
8003 case UNSPEC_GOTNTPOFF:
8004 output_addr_const (file, op);
8005 if (TARGET_64BIT)
8006 fputs ("@GOTTPOFF(%rip)", file);
8007 else
8008 fputs ("@GOTNTPOFF", file);
8009 break;
8010 case UNSPEC_INDNTPOFF:
8011 output_addr_const (file, op);
8012 fputs ("@INDNTPOFF", file);
8013 break;
8015 default:
8016 return false;
8019 return true;
8022 /* Split one or more DImode RTL references into pairs of SImode
8023 references. The RTL can be REG, offsettable MEM, integer constant, or
8024 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8025 split and "num" is its length. lo_half and hi_half are output arrays
8026 that parallel "operands". */
8028 void
8029 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8031 while (num--)
8033 rtx op = operands[num];
8035 /* simplify_subreg refuse to split volatile memory addresses,
8036 but we still have to handle it. */
8037 if (GET_CODE (op) == MEM)
8039 lo_half[num] = adjust_address (op, SImode, 0);
8040 hi_half[num] = adjust_address (op, SImode, 4);
8042 else
8044 lo_half[num] = simplify_gen_subreg (SImode, op,
8045 GET_MODE (op) == VOIDmode
8046 ? DImode : GET_MODE (op), 0);
8047 hi_half[num] = simplify_gen_subreg (SImode, op,
8048 GET_MODE (op) == VOIDmode
8049 ? DImode : GET_MODE (op), 4);
8053 /* Split one or more TImode RTL references into pairs of DImode
8054 references. The RTL can be REG, offsettable MEM, integer constant, or
8055 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8056 split and "num" is its length. lo_half and hi_half are output arrays
8057 that parallel "operands". */
8059 void
8060 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8062 while (num--)
8064 rtx op = operands[num];
8066 /* simplify_subreg refuse to split volatile memory addresses, but we
8067 still have to handle it. */
8068 if (GET_CODE (op) == MEM)
8070 lo_half[num] = adjust_address (op, DImode, 0);
8071 hi_half[num] = adjust_address (op, DImode, 8);
8073 else
8075 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8076 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8081 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8082 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8083 is the expression of the binary operation. The output may either be
8084 emitted here, or returned to the caller, like all output_* functions.
8086 There is no guarantee that the operands are the same mode, as they
8087 might be within FLOAT or FLOAT_EXTEND expressions. */
8089 #ifndef SYSV386_COMPAT
8090 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8091 wants to fix the assemblers because that causes incompatibility
8092 with gcc. No-one wants to fix gcc because that causes
8093 incompatibility with assemblers... You can use the option of
8094 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8095 #define SYSV386_COMPAT 1
8096 #endif
8098 const char *
8099 output_387_binary_op (rtx insn, rtx *operands)
8101 static char buf[30];
8102 const char *p;
8103 const char *ssep;
8104 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8106 #ifdef ENABLE_CHECKING
8107 /* Even if we do not want to check the inputs, this documents input
8108 constraints. Which helps in understanding the following code. */
8109 if (STACK_REG_P (operands[0])
8110 && ((REG_P (operands[1])
8111 && REGNO (operands[0]) == REGNO (operands[1])
8112 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8113 || (REG_P (operands[2])
8114 && REGNO (operands[0]) == REGNO (operands[2])
8115 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8116 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8117 ; /* ok */
8118 else
8119 gcc_assert (is_sse);
8120 #endif
8122 switch (GET_CODE (operands[3]))
8124 case PLUS:
8125 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8126 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8127 p = "fiadd";
8128 else
8129 p = "fadd";
8130 ssep = "add";
8131 break;
8133 case MINUS:
8134 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8135 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8136 p = "fisub";
8137 else
8138 p = "fsub";
8139 ssep = "sub";
8140 break;
8142 case MULT:
8143 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8144 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8145 p = "fimul";
8146 else
8147 p = "fmul";
8148 ssep = "mul";
8149 break;
8151 case DIV:
8152 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8153 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8154 p = "fidiv";
8155 else
8156 p = "fdiv";
8157 ssep = "div";
8158 break;
8160 default:
8161 gcc_unreachable ();
8164 if (is_sse)
8166 strcpy (buf, ssep);
8167 if (GET_MODE (operands[0]) == SFmode)
8168 strcat (buf, "ss\t{%2, %0|%0, %2}");
8169 else
8170 strcat (buf, "sd\t{%2, %0|%0, %2}");
8171 return buf;
8173 strcpy (buf, p);
8175 switch (GET_CODE (operands[3]))
8177 case MULT:
8178 case PLUS:
8179 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8181 rtx temp = operands[2];
8182 operands[2] = operands[1];
8183 operands[1] = temp;
8186 /* know operands[0] == operands[1]. */
8188 if (GET_CODE (operands[2]) == MEM)
8190 p = "%z2\t%2";
8191 break;
8194 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8196 if (STACK_TOP_P (operands[0]))
8197 /* How is it that we are storing to a dead operand[2]?
8198 Well, presumably operands[1] is dead too. We can't
8199 store the result to st(0) as st(0) gets popped on this
8200 instruction. Instead store to operands[2] (which I
8201 think has to be st(1)). st(1) will be popped later.
8202 gcc <= 2.8.1 didn't have this check and generated
8203 assembly code that the Unixware assembler rejected. */
8204 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8205 else
8206 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8207 break;
8210 if (STACK_TOP_P (operands[0]))
8211 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8212 else
8213 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8214 break;
8216 case MINUS:
8217 case DIV:
8218 if (GET_CODE (operands[1]) == MEM)
8220 p = "r%z1\t%1";
8221 break;
8224 if (GET_CODE (operands[2]) == MEM)
8226 p = "%z2\t%2";
8227 break;
8230 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8232 #if SYSV386_COMPAT
8233 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8234 derived assemblers, confusingly reverse the direction of
8235 the operation for fsub{r} and fdiv{r} when the
8236 destination register is not st(0). The Intel assembler
8237 doesn't have this brain damage. Read !SYSV386_COMPAT to
8238 figure out what the hardware really does. */
8239 if (STACK_TOP_P (operands[0]))
8240 p = "{p\t%0, %2|rp\t%2, %0}";
8241 else
8242 p = "{rp\t%2, %0|p\t%0, %2}";
8243 #else
8244 if (STACK_TOP_P (operands[0]))
8245 /* As above for fmul/fadd, we can't store to st(0). */
8246 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8247 else
8248 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8249 #endif
8250 break;
8253 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8255 #if SYSV386_COMPAT
8256 if (STACK_TOP_P (operands[0]))
8257 p = "{rp\t%0, %1|p\t%1, %0}";
8258 else
8259 p = "{p\t%1, %0|rp\t%0, %1}";
8260 #else
8261 if (STACK_TOP_P (operands[0]))
8262 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8263 else
8264 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8265 #endif
8266 break;
8269 if (STACK_TOP_P (operands[0]))
8271 if (STACK_TOP_P (operands[1]))
8272 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8273 else
8274 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8275 break;
8277 else if (STACK_TOP_P (operands[1]))
8279 #if SYSV386_COMPAT
8280 p = "{\t%1, %0|r\t%0, %1}";
8281 #else
8282 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8283 #endif
8285 else
8287 #if SYSV386_COMPAT
8288 p = "{r\t%2, %0|\t%0, %2}";
8289 #else
8290 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8291 #endif
8293 break;
8295 default:
8296 gcc_unreachable ();
8299 strcat (buf, p);
8300 return buf;
8303 /* Return needed mode for entity in optimize_mode_switching pass. */
8306 ix86_mode_needed (int entity, rtx insn)
8308 enum attr_i387_cw mode;
8310 /* The mode UNINITIALIZED is used to store control word after a
8311 function call or ASM pattern. The mode ANY specify that function
8312 has no requirements on the control word and make no changes in the
8313 bits we are interested in. */
8315 if (CALL_P (insn)
8316 || (NONJUMP_INSN_P (insn)
8317 && (asm_noperands (PATTERN (insn)) >= 0
8318 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8319 return I387_CW_UNINITIALIZED;
8321 if (recog_memoized (insn) < 0)
8322 return I387_CW_ANY;
8324 mode = get_attr_i387_cw (insn);
8326 switch (entity)
8328 case I387_TRUNC:
8329 if (mode == I387_CW_TRUNC)
8330 return mode;
8331 break;
8333 case I387_FLOOR:
8334 if (mode == I387_CW_FLOOR)
8335 return mode;
8336 break;
8338 case I387_CEIL:
8339 if (mode == I387_CW_CEIL)
8340 return mode;
8341 break;
8343 case I387_MASK_PM:
8344 if (mode == I387_CW_MASK_PM)
8345 return mode;
8346 break;
8348 default:
8349 gcc_unreachable ();
8352 return I387_CW_ANY;
8355 /* Output code to initialize control word copies used by trunc?f?i and
8356 rounding patterns. CURRENT_MODE is set to current control word,
8357 while NEW_MODE is set to new control word. */
8359 void
8360 emit_i387_cw_initialization (int mode)
8362 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8363 rtx new_mode;
8365 int slot;
8367 rtx reg = gen_reg_rtx (HImode);
8369 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8370 emit_move_insn (reg, stored_mode);
8372 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8374 switch (mode)
8376 case I387_CW_TRUNC:
8377 /* round toward zero (truncate) */
8378 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8379 slot = SLOT_CW_TRUNC;
8380 break;
8382 case I387_CW_FLOOR:
8383 /* round down toward -oo */
8384 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8385 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8386 slot = SLOT_CW_FLOOR;
8387 break;
8389 case I387_CW_CEIL:
8390 /* round up toward +oo */
8391 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8392 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8393 slot = SLOT_CW_CEIL;
8394 break;
8396 case I387_CW_MASK_PM:
8397 /* mask precision exception for nearbyint() */
8398 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8399 slot = SLOT_CW_MASK_PM;
8400 break;
8402 default:
8403 gcc_unreachable ();
8406 else
8408 switch (mode)
8410 case I387_CW_TRUNC:
8411 /* round toward zero (truncate) */
8412 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8413 slot = SLOT_CW_TRUNC;
8414 break;
8416 case I387_CW_FLOOR:
8417 /* round down toward -oo */
8418 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8419 slot = SLOT_CW_FLOOR;
8420 break;
8422 case I387_CW_CEIL:
8423 /* round up toward +oo */
8424 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8425 slot = SLOT_CW_CEIL;
8426 break;
8428 case I387_CW_MASK_PM:
8429 /* mask precision exception for nearbyint() */
8430 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8431 slot = SLOT_CW_MASK_PM;
8432 break;
8434 default:
8435 gcc_unreachable ();
8439 gcc_assert (slot < MAX_386_STACK_LOCALS);
8441 new_mode = assign_386_stack_local (HImode, slot);
8442 emit_move_insn (new_mode, reg);
8445 /* Output code for INSN to convert a float to a signed int. OPERANDS
8446 are the insn operands. The output may be [HSD]Imode and the input
8447 operand may be [SDX]Fmode. */
8449 const char *
8450 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8452 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8453 int dimode_p = GET_MODE (operands[0]) == DImode;
8454 int round_mode = get_attr_i387_cw (insn);
8456 /* Jump through a hoop or two for DImode, since the hardware has no
8457 non-popping instruction. We used to do this a different way, but
8458 that was somewhat fragile and broke with post-reload splitters. */
8459 if ((dimode_p || fisttp) && !stack_top_dies)
8460 output_asm_insn ("fld\t%y1", operands);
8462 gcc_assert (STACK_TOP_P (operands[1]));
8463 gcc_assert (GET_CODE (operands[0]) == MEM);
8465 if (fisttp)
8466 output_asm_insn ("fisttp%z0\t%0", operands);
8467 else
8469 if (round_mode != I387_CW_ANY)
8470 output_asm_insn ("fldcw\t%3", operands);
8471 if (stack_top_dies || dimode_p)
8472 output_asm_insn ("fistp%z0\t%0", operands);
8473 else
8474 output_asm_insn ("fist%z0\t%0", operands);
8475 if (round_mode != I387_CW_ANY)
8476 output_asm_insn ("fldcw\t%2", operands);
8479 return "";
8482 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8483 should be used. UNORDERED_P is true when fucom should be used. */
8485 const char *
8486 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8488 int stack_top_dies;
8489 rtx cmp_op0, cmp_op1;
8490 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8492 if (eflags_p)
8494 cmp_op0 = operands[0];
8495 cmp_op1 = operands[1];
8497 else
8499 cmp_op0 = operands[1];
8500 cmp_op1 = operands[2];
8503 if (is_sse)
8505 if (GET_MODE (operands[0]) == SFmode)
8506 if (unordered_p)
8507 return "ucomiss\t{%1, %0|%0, %1}";
8508 else
8509 return "comiss\t{%1, %0|%0, %1}";
8510 else
8511 if (unordered_p)
8512 return "ucomisd\t{%1, %0|%0, %1}";
8513 else
8514 return "comisd\t{%1, %0|%0, %1}";
8517 gcc_assert (STACK_TOP_P (cmp_op0));
8519 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8521 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8523 if (stack_top_dies)
8525 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8526 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
8528 else
8529 return "ftst\n\tfnstsw\t%0";
8532 if (STACK_REG_P (cmp_op1)
8533 && stack_top_dies
8534 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8535 && REGNO (cmp_op1) != FIRST_STACK_REG)
8537 /* If both the top of the 387 stack dies, and the other operand
8538 is also a stack register that dies, then this must be a
8539 `fcompp' float compare */
8541 if (eflags_p)
8543 /* There is no double popping fcomi variant. Fortunately,
8544 eflags is immune from the fstp's cc clobbering. */
8545 if (unordered_p)
8546 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8547 else
8548 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8549 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8551 else
8553 if (unordered_p)
8554 return "fucompp\n\tfnstsw\t%0";
8555 else
8556 return "fcompp\n\tfnstsw\t%0";
8559 else
8561 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8563 static const char * const alt[16] =
8565 "fcom%z2\t%y2\n\tfnstsw\t%0",
8566 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8567 "fucom%z2\t%y2\n\tfnstsw\t%0",
8568 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8570 "ficom%z2\t%y2\n\tfnstsw\t%0",
8571 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8572 NULL,
8573 NULL,
8575 "fcomi\t{%y1, %0|%0, %y1}",
8576 "fcomip\t{%y1, %0|%0, %y1}",
8577 "fucomi\t{%y1, %0|%0, %y1}",
8578 "fucomip\t{%y1, %0|%0, %y1}",
8580 NULL,
8581 NULL,
8582 NULL,
8583 NULL
8586 int mask;
8587 const char *ret;
8589 mask = eflags_p << 3;
8590 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8591 mask |= unordered_p << 1;
8592 mask |= stack_top_dies;
8594 gcc_assert (mask < 16);
8595 ret = alt[mask];
8596 gcc_assert (ret);
8598 return ret;
8602 void
8603 ix86_output_addr_vec_elt (FILE *file, int value)
8605 const char *directive = ASM_LONG;
8607 #ifdef ASM_QUAD
8608 if (TARGET_64BIT)
8609 directive = ASM_QUAD;
8610 #else
8611 gcc_assert (!TARGET_64BIT);
8612 #endif
8614 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8617 void
8618 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8620 if (TARGET_64BIT)
8621 fprintf (file, "%s%s%d-%s%d\n",
8622 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8623 else if (HAVE_AS_GOTOFF_IN_DATA)
8624 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8625 #if TARGET_MACHO
8626 else if (TARGET_MACHO)
8628 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8629 machopic_output_function_base_name (file);
8630 fprintf(file, "\n");
8632 #endif
8633 else
8634 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8635 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8638 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8639 for the target. */
8641 void
8642 ix86_expand_clear (rtx dest)
8644 rtx tmp;
8646 /* We play register width games, which are only valid after reload. */
8647 gcc_assert (reload_completed);
8649 /* Avoid HImode and its attendant prefix byte. */
8650 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8651 dest = gen_rtx_REG (SImode, REGNO (dest));
8653 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8655 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8656 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8658 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8659 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8662 emit_insn (tmp);
8665 /* X is an unchanging MEM. If it is a constant pool reference, return
8666 the constant pool rtx, else NULL. */
8669 maybe_get_pool_constant (rtx x)
8671 x = ix86_delegitimize_address (XEXP (x, 0));
8673 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8674 return get_pool_constant (x);
8676 return NULL_RTX;
8679 void
8680 ix86_expand_move (enum machine_mode mode, rtx operands[])
8682 int strict = (reload_in_progress || reload_completed);
8683 rtx op0, op1;
8684 enum tls_model model;
8686 op0 = operands[0];
8687 op1 = operands[1];
8689 if (GET_CODE (op1) == SYMBOL_REF)
8691 model = SYMBOL_REF_TLS_MODEL (op1);
8692 if (model)
8694 op1 = legitimize_tls_address (op1, model, true);
8695 op1 = force_operand (op1, op0);
8696 if (op1 == op0)
8697 return;
8700 else if (GET_CODE (op1) == CONST
8701 && GET_CODE (XEXP (op1, 0)) == PLUS
8702 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8704 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8705 if (model)
8707 rtx addend = XEXP (XEXP (op1, 0), 1);
8708 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8709 op1 = force_operand (op1, NULL);
8710 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8711 op0, 1, OPTAB_DIRECT);
8712 if (op1 == op0)
8713 return;
8717 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8719 #if TARGET_MACHO
8720 if (MACHOPIC_PURE)
8722 rtx temp = ((reload_in_progress
8723 || ((op0 && GET_CODE (op0) == REG)
8724 && mode == Pmode))
8725 ? op0 : gen_reg_rtx (Pmode));
8726 op1 = machopic_indirect_data_reference (op1, temp);
8727 op1 = machopic_legitimize_pic_address (op1, mode,
8728 temp == op1 ? 0 : temp);
8730 else if (MACHOPIC_INDIRECT)
8731 op1 = machopic_indirect_data_reference (op1, 0);
8732 if (op0 == op1)
8733 return;
8734 #else
8735 if (GET_CODE (op0) == MEM)
8736 op1 = force_reg (Pmode, op1);
8737 else
8738 op1 = legitimize_address (op1, op1, Pmode);
8739 #endif /* TARGET_MACHO */
8741 else
8743 if (GET_CODE (op0) == MEM
8744 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8745 || !push_operand (op0, mode))
8746 && GET_CODE (op1) == MEM)
8747 op1 = force_reg (mode, op1);
8749 if (push_operand (op0, mode)
8750 && ! general_no_elim_operand (op1, mode))
8751 op1 = copy_to_mode_reg (mode, op1);
8753 /* Force large constants in 64bit compilation into register
8754 to get them CSEed. */
8755 if (TARGET_64BIT && mode == DImode
8756 && immediate_operand (op1, mode)
8757 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8758 && !register_operand (op0, mode)
8759 && optimize && !reload_completed && !reload_in_progress)
8760 op1 = copy_to_mode_reg (mode, op1);
8762 if (FLOAT_MODE_P (mode))
8764 /* If we are loading a floating point constant to a register,
8765 force the value to memory now, since we'll get better code
8766 out the back end. */
8768 if (strict)
8770 else if (GET_CODE (op1) == CONST_DOUBLE)
8772 op1 = validize_mem (force_const_mem (mode, op1));
8773 if (!register_operand (op0, mode))
8775 rtx temp = gen_reg_rtx (mode);
8776 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8777 emit_move_insn (op0, temp);
8778 return;
8784 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8787 void
8788 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8790 rtx op0 = operands[0], op1 = operands[1];
8792 /* Force constants other than zero into memory. We do not know how
8793 the instructions used to build constants modify the upper 64 bits
8794 of the register, once we have that information we may be able
8795 to handle some of them more efficiently. */
8796 if ((reload_in_progress | reload_completed) == 0
8797 && register_operand (op0, mode)
8798 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
8799 op1 = validize_mem (force_const_mem (mode, op1));
8801 /* Make operand1 a register if it isn't already. */
8802 if (!no_new_pseudos
8803 && !register_operand (op0, mode)
8804 && !register_operand (op1, mode))
8806 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8807 return;
8810 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8813 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
8814 straight to ix86_expand_vector_move. */
8816 void
8817 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
8819 rtx op0, op1, m;
8821 op0 = operands[0];
8822 op1 = operands[1];
8824 if (MEM_P (op1))
8826 /* If we're optimizing for size, movups is the smallest. */
8827 if (optimize_size)
8829 op0 = gen_lowpart (V4SFmode, op0);
8830 op1 = gen_lowpart (V4SFmode, op1);
8831 emit_insn (gen_sse_movups (op0, op1));
8832 return;
8835 /* ??? If we have typed data, then it would appear that using
8836 movdqu is the only way to get unaligned data loaded with
8837 integer type. */
8838 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8840 op0 = gen_lowpart (V16QImode, op0);
8841 op1 = gen_lowpart (V16QImode, op1);
8842 emit_insn (gen_sse2_movdqu (op0, op1));
8843 return;
8846 if (TARGET_SSE2 && mode == V2DFmode)
8848 rtx zero;
8850 /* When SSE registers are split into halves, we can avoid
8851 writing to the top half twice. */
8852 if (TARGET_SSE_SPLIT_REGS)
8854 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8855 zero = op0;
8857 else
8859 /* ??? Not sure about the best option for the Intel chips.
8860 The following would seem to satisfy; the register is
8861 entirely cleared, breaking the dependency chain. We
8862 then store to the upper half, with a dependency depth
8863 of one. A rumor has it that Intel recommends two movsd
8864 followed by an unpacklpd, but this is unconfirmed. And
8865 given that the dependency depth of the unpacklpd would
8866 still be one, I'm not sure why this would be better. */
8867 zero = CONST0_RTX (V2DFmode);
8870 m = adjust_address (op1, DFmode, 0);
8871 emit_insn (gen_sse2_loadlpd (op0, zero, m));
8872 m = adjust_address (op1, DFmode, 8);
8873 emit_insn (gen_sse2_loadhpd (op0, op0, m));
8875 else
8877 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8878 emit_move_insn (op0, CONST0_RTX (mode));
8879 else
8880 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8882 if (mode != V4SFmode)
8883 op0 = gen_lowpart (V4SFmode, op0);
8884 m = adjust_address (op1, V2SFmode, 0);
8885 emit_insn (gen_sse_loadlps (op0, op0, m));
8886 m = adjust_address (op1, V2SFmode, 8);
8887 emit_insn (gen_sse_loadhps (op0, op0, m));
8890 else if (MEM_P (op0))
8892 /* If we're optimizing for size, movups is the smallest. */
8893 if (optimize_size)
8895 op0 = gen_lowpart (V4SFmode, op0);
8896 op1 = gen_lowpart (V4SFmode, op1);
8897 emit_insn (gen_sse_movups (op0, op1));
8898 return;
8901 /* ??? Similar to above, only less clear because of quote
8902 typeless stores unquote. */
8903 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8904 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8906 op0 = gen_lowpart (V16QImode, op0);
8907 op1 = gen_lowpart (V16QImode, op1);
8908 emit_insn (gen_sse2_movdqu (op0, op1));
8909 return;
8912 if (TARGET_SSE2 && mode == V2DFmode)
8914 m = adjust_address (op0, DFmode, 0);
8915 emit_insn (gen_sse2_storelpd (m, op1));
8916 m = adjust_address (op0, DFmode, 8);
8917 emit_insn (gen_sse2_storehpd (m, op1));
8919 else
8921 if (mode != V4SFmode)
8922 op1 = gen_lowpart (V4SFmode, op1);
8923 m = adjust_address (op0, V2SFmode, 0);
8924 emit_insn (gen_sse_storelps (m, op1));
8925 m = adjust_address (op0, V2SFmode, 8);
8926 emit_insn (gen_sse_storehps (m, op1));
8929 else
8930 gcc_unreachable ();
8933 /* Expand a push in MODE. This is some mode for which we do not support
8934 proper push instructions, at least from the registers that we expect
8935 the value to live in. */
8937 void
8938 ix86_expand_push (enum machine_mode mode, rtx x)
8940 rtx tmp;
8942 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8943 GEN_INT (-GET_MODE_SIZE (mode)),
8944 stack_pointer_rtx, 1, OPTAB_DIRECT);
8945 if (tmp != stack_pointer_rtx)
8946 emit_move_insn (stack_pointer_rtx, tmp);
8948 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8949 emit_move_insn (tmp, x);
8952 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8953 destination to use for the operation. If different from the true
8954 destination in operands[0], a copy operation will be required. */
8957 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8958 rtx operands[])
8960 int matching_memory;
8961 rtx src1, src2, dst;
8963 dst = operands[0];
8964 src1 = operands[1];
8965 src2 = operands[2];
8967 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8968 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8969 && (rtx_equal_p (dst, src2)
8970 || immediate_operand (src1, mode)))
8972 rtx temp = src1;
8973 src1 = src2;
8974 src2 = temp;
8977 /* If the destination is memory, and we do not have matching source
8978 operands, do things in registers. */
8979 matching_memory = 0;
8980 if (GET_CODE (dst) == MEM)
8982 if (rtx_equal_p (dst, src1))
8983 matching_memory = 1;
8984 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8985 && rtx_equal_p (dst, src2))
8986 matching_memory = 2;
8987 else
8988 dst = gen_reg_rtx (mode);
8991 /* Both source operands cannot be in memory. */
8992 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8994 if (matching_memory != 2)
8995 src2 = force_reg (mode, src2);
8996 else
8997 src1 = force_reg (mode, src1);
9000 /* If the operation is not commutable, source 1 cannot be a constant
9001 or non-matching memory. */
9002 if ((CONSTANT_P (src1)
9003 || (!matching_memory && GET_CODE (src1) == MEM))
9004 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9005 src1 = force_reg (mode, src1);
9007 src1 = operands[1] = src1;
9008 src2 = operands[2] = src2;
9009 return dst;
9012 /* Similarly, but assume that the destination has already been
9013 set up properly. */
9015 void
9016 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9017 enum machine_mode mode, rtx operands[])
9019 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9020 gcc_assert (dst == operands[0]);
9023 /* Attempt to expand a binary operator. Make the expansion closer to the
9024 actual machine, then just general_operand, which will allow 3 separate
9025 memory references (one output, two input) in a single insn. */
9027 void
9028 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9029 rtx operands[])
9031 rtx src1, src2, dst, op, clob;
9033 dst = ix86_fixup_binary_operands (code, mode, operands);
9034 src1 = operands[1];
9035 src2 = operands[2];
9037 /* Emit the instruction. */
9039 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9040 if (reload_in_progress)
9042 /* Reload doesn't know about the flags register, and doesn't know that
9043 it doesn't want to clobber it. We can only do this with PLUS. */
9044 gcc_assert (code == PLUS);
9045 emit_insn (op);
9047 else
9049 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9050 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9053 /* Fix up the destination if needed. */
9054 if (dst != operands[0])
9055 emit_move_insn (operands[0], dst);
9058 /* Return TRUE or FALSE depending on whether the binary operator meets the
9059 appropriate constraints. */
9062 ix86_binary_operator_ok (enum rtx_code code,
9063 enum machine_mode mode ATTRIBUTE_UNUSED,
9064 rtx operands[3])
9066 /* Both source operands cannot be in memory. */
9067 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9068 return 0;
9069 /* If the operation is not commutable, source 1 cannot be a constant. */
9070 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9071 return 0;
9072 /* If the destination is memory, we must have a matching source operand. */
9073 if (GET_CODE (operands[0]) == MEM
9074 && ! (rtx_equal_p (operands[0], operands[1])
9075 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9076 && rtx_equal_p (operands[0], operands[2]))))
9077 return 0;
9078 /* If the operation is not commutable and the source 1 is memory, we must
9079 have a matching destination. */
9080 if (GET_CODE (operands[1]) == MEM
9081 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9082 && ! rtx_equal_p (operands[0], operands[1]))
9083 return 0;
9084 return 1;
9087 /* Attempt to expand a unary operator. Make the expansion closer to the
9088 actual machine, then just general_operand, which will allow 2 separate
9089 memory references (one output, one input) in a single insn. */
9091 void
9092 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9093 rtx operands[])
9095 int matching_memory;
9096 rtx src, dst, op, clob;
9098 dst = operands[0];
9099 src = operands[1];
9101 /* If the destination is memory, and we do not have matching source
9102 operands, do things in registers. */
9103 matching_memory = 0;
9104 if (MEM_P (dst))
9106 if (rtx_equal_p (dst, src))
9107 matching_memory = 1;
9108 else
9109 dst = gen_reg_rtx (mode);
9112 /* When source operand is memory, destination must match. */
9113 if (MEM_P (src) && !matching_memory)
9114 src = force_reg (mode, src);
9116 /* Emit the instruction. */
9118 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9119 if (reload_in_progress || code == NOT)
9121 /* Reload doesn't know about the flags register, and doesn't know that
9122 it doesn't want to clobber it. */
9123 gcc_assert (code == NOT);
9124 emit_insn (op);
9126 else
9128 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9129 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9132 /* Fix up the destination if needed. */
9133 if (dst != operands[0])
9134 emit_move_insn (operands[0], dst);
9137 /* Return TRUE or FALSE depending on whether the unary operator meets the
9138 appropriate constraints. */
9141 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9142 enum machine_mode mode ATTRIBUTE_UNUSED,
9143 rtx operands[2] ATTRIBUTE_UNUSED)
9145 /* If one of operands is memory, source and destination must match. */
9146 if ((GET_CODE (operands[0]) == MEM
9147 || GET_CODE (operands[1]) == MEM)
9148 && ! rtx_equal_p (operands[0], operands[1]))
9149 return FALSE;
9150 return TRUE;
9153 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9154 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9155 true, then replicate the mask for all elements of the vector register.
9156 If INVERT is true, then create a mask excluding the sign bit. */
9159 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9161 enum machine_mode vec_mode;
9162 HOST_WIDE_INT hi, lo;
9163 int shift = 63;
9164 rtvec v;
9165 rtx mask;
9167 /* Find the sign bit, sign extended to 2*HWI. */
9168 if (mode == SFmode)
9169 lo = 0x80000000, hi = lo < 0;
9170 else if (HOST_BITS_PER_WIDE_INT >= 64)
9171 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9172 else
9173 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9175 if (invert)
9176 lo = ~lo, hi = ~hi;
9178 /* Force this value into the low part of a fp vector constant. */
9179 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9180 mask = gen_lowpart (mode, mask);
9182 if (mode == SFmode)
9184 if (vect)
9185 v = gen_rtvec (4, mask, mask, mask, mask);
9186 else
9187 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9188 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9189 vec_mode = V4SFmode;
9191 else
9193 if (vect)
9194 v = gen_rtvec (2, mask, mask);
9195 else
9196 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9197 vec_mode = V2DFmode;
9200 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9203 /* Generate code for floating point ABS or NEG. */
9205 void
9206 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9207 rtx operands[])
9209 rtx mask, set, use, clob, dst, src;
9210 bool matching_memory;
9211 bool use_sse = false;
9212 bool vector_mode = VECTOR_MODE_P (mode);
9213 enum machine_mode elt_mode = mode;
9215 if (vector_mode)
9217 elt_mode = GET_MODE_INNER (mode);
9218 use_sse = true;
9220 else if (TARGET_SSE_MATH)
9221 use_sse = SSE_FLOAT_MODE_P (mode);
9223 /* NEG and ABS performed with SSE use bitwise mask operations.
9224 Create the appropriate mask now. */
9225 if (use_sse)
9226 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9227 else
9229 /* When not using SSE, we don't use the mask, but prefer to keep the
9230 same general form of the insn pattern to reduce duplication when
9231 it comes time to split. */
9232 mask = const0_rtx;
9235 dst = operands[0];
9236 src = operands[1];
9238 /* If the destination is memory, and we don't have matching source
9239 operands, do things in registers. */
9240 matching_memory = false;
9241 if (MEM_P (dst))
9243 if (rtx_equal_p (dst, src))
9244 matching_memory = true;
9245 else
9246 dst = gen_reg_rtx (mode);
9248 if (MEM_P (src) && !matching_memory)
9249 src = force_reg (mode, src);
9251 if (vector_mode)
9253 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9254 set = gen_rtx_SET (VOIDmode, dst, set);
9255 emit_insn (set);
9257 else
9259 set = gen_rtx_fmt_e (code, mode, src);
9260 set = gen_rtx_SET (VOIDmode, dst, set);
9261 use = gen_rtx_USE (VOIDmode, mask);
9262 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9263 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
9266 if (dst != operands[0])
9267 emit_move_insn (operands[0], dst);
9270 /* Expand a copysign operation. Special case operand 0 being a constant. */
9272 void
9273 ix86_expand_copysign (rtx operands[])
9275 enum machine_mode mode, vmode;
9276 rtx dest, op0, op1, mask, nmask;
9278 dest = operands[0];
9279 op0 = operands[1];
9280 op1 = operands[2];
9282 mode = GET_MODE (dest);
9283 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9285 if (GET_CODE (op0) == CONST_DOUBLE)
9287 rtvec v;
9289 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9290 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9292 if (op0 == CONST0_RTX (mode))
9293 op0 = CONST0_RTX (vmode);
9294 else
9296 if (mode == SFmode)
9297 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9298 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9299 else
9300 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9301 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9304 mask = ix86_build_signbit_mask (mode, 0, 0);
9306 if (mode == SFmode)
9307 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9308 else
9309 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9311 else
9313 nmask = ix86_build_signbit_mask (mode, 0, 1);
9314 mask = ix86_build_signbit_mask (mode, 0, 0);
9316 if (mode == SFmode)
9317 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9318 else
9319 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9323 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9324 be a constant, and so has already been expanded into a vector constant. */
9326 void
9327 ix86_split_copysign_const (rtx operands[])
9329 enum machine_mode mode, vmode;
9330 rtx dest, op0, op1, mask, x;
9332 dest = operands[0];
9333 op0 = operands[1];
9334 op1 = operands[2];
9335 mask = operands[3];
9337 mode = GET_MODE (dest);
9338 vmode = GET_MODE (mask);
9340 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9341 x = gen_rtx_AND (vmode, dest, mask);
9342 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9344 if (op0 != CONST0_RTX (vmode))
9346 x = gen_rtx_IOR (vmode, dest, op0);
9347 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9351 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9352 so we have to do two masks. */
9354 void
9355 ix86_split_copysign_var (rtx operands[])
9357 enum machine_mode mode, vmode;
9358 rtx dest, scratch, op0, op1, mask, nmask, x;
9360 dest = operands[0];
9361 scratch = operands[1];
9362 op0 = operands[2];
9363 op1 = operands[3];
9364 nmask = operands[4];
9365 mask = operands[5];
9367 mode = GET_MODE (dest);
9368 vmode = GET_MODE (mask);
9370 if (rtx_equal_p (op0, op1))
9372 /* Shouldn't happen often (it's useless, obviously), but when it does
9373 we'd generate incorrect code if we continue below. */
9374 emit_move_insn (dest, op0);
9375 return;
9378 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9380 gcc_assert (REGNO (op1) == REGNO (scratch));
9382 x = gen_rtx_AND (vmode, scratch, mask);
9383 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9385 dest = mask;
9386 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9387 x = gen_rtx_NOT (vmode, dest);
9388 x = gen_rtx_AND (vmode, x, op0);
9389 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9391 else
9393 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9395 x = gen_rtx_AND (vmode, scratch, mask);
9397 else /* alternative 2,4 */
9399 gcc_assert (REGNO (mask) == REGNO (scratch));
9400 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9401 x = gen_rtx_AND (vmode, scratch, op1);
9403 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9405 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9407 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9408 x = gen_rtx_AND (vmode, dest, nmask);
9410 else /* alternative 3,4 */
9412 gcc_assert (REGNO (nmask) == REGNO (dest));
9413 dest = nmask;
9414 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9415 x = gen_rtx_AND (vmode, dest, op0);
9417 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9420 x = gen_rtx_IOR (vmode, dest, scratch);
9421 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9424 /* Return TRUE or FALSE depending on whether the first SET in INSN
9425 has source and destination with matching CC modes, and that the
9426 CC mode is at least as constrained as REQ_MODE. */
9429 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9431 rtx set;
9432 enum machine_mode set_mode;
9434 set = PATTERN (insn);
9435 if (GET_CODE (set) == PARALLEL)
9436 set = XVECEXP (set, 0, 0);
9437 gcc_assert (GET_CODE (set) == SET);
9438 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9440 set_mode = GET_MODE (SET_DEST (set));
9441 switch (set_mode)
9443 case CCNOmode:
9444 if (req_mode != CCNOmode
9445 && (req_mode != CCmode
9446 || XEXP (SET_SRC (set), 1) != const0_rtx))
9447 return 0;
9448 break;
9449 case CCmode:
9450 if (req_mode == CCGCmode)
9451 return 0;
9452 /* FALLTHRU */
9453 case CCGCmode:
9454 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9455 return 0;
9456 /* FALLTHRU */
9457 case CCGOCmode:
9458 if (req_mode == CCZmode)
9459 return 0;
9460 /* FALLTHRU */
9461 case CCZmode:
9462 break;
9464 default:
9465 gcc_unreachable ();
9468 return (GET_MODE (SET_SRC (set)) == set_mode);
9471 /* Generate insn patterns to do an integer compare of OPERANDS. */
9473 static rtx
9474 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9476 enum machine_mode cmpmode;
9477 rtx tmp, flags;
9479 cmpmode = SELECT_CC_MODE (code, op0, op1);
9480 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9482 /* This is very simple, but making the interface the same as in the
9483 FP case makes the rest of the code easier. */
9484 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9485 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9487 /* Return the test that should be put into the flags user, i.e.
9488 the bcc, scc, or cmov instruction. */
9489 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9492 /* Figure out whether to use ordered or unordered fp comparisons.
9493 Return the appropriate mode to use. */
9495 enum machine_mode
9496 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9498 /* ??? In order to make all comparisons reversible, we do all comparisons
9499 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9500 all forms trapping and nontrapping comparisons, we can make inequality
9501 comparisons trapping again, since it results in better code when using
9502 FCOM based compares. */
9503 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9506 enum machine_mode
9507 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9509 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9510 return ix86_fp_compare_mode (code);
9511 switch (code)
9513 /* Only zero flag is needed. */
9514 case EQ: /* ZF=0 */
9515 case NE: /* ZF!=0 */
9516 return CCZmode;
9517 /* Codes needing carry flag. */
9518 case GEU: /* CF=0 */
9519 case GTU: /* CF=0 & ZF=0 */
9520 case LTU: /* CF=1 */
9521 case LEU: /* CF=1 | ZF=1 */
9522 return CCmode;
9523 /* Codes possibly doable only with sign flag when
9524 comparing against zero. */
9525 case GE: /* SF=OF or SF=0 */
9526 case LT: /* SF<>OF or SF=1 */
9527 if (op1 == const0_rtx)
9528 return CCGOCmode;
9529 else
9530 /* For other cases Carry flag is not required. */
9531 return CCGCmode;
9532 /* Codes doable only with sign flag when comparing
9533 against zero, but we miss jump instruction for it
9534 so we need to use relational tests against overflow
9535 that thus needs to be zero. */
9536 case GT: /* ZF=0 & SF=OF */
9537 case LE: /* ZF=1 | SF<>OF */
9538 if (op1 == const0_rtx)
9539 return CCNOmode;
9540 else
9541 return CCGCmode;
9542 /* strcmp pattern do (use flags) and combine may ask us for proper
9543 mode. */
9544 case USE:
9545 return CCmode;
9546 default:
9547 gcc_unreachable ();
9551 /* Return the fixed registers used for condition codes. */
9553 static bool
9554 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9556 *p1 = FLAGS_REG;
9557 *p2 = FPSR_REG;
9558 return true;
9561 /* If two condition code modes are compatible, return a condition code
9562 mode which is compatible with both. Otherwise, return
9563 VOIDmode. */
9565 static enum machine_mode
9566 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9568 if (m1 == m2)
9569 return m1;
9571 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9572 return VOIDmode;
9574 if ((m1 == CCGCmode && m2 == CCGOCmode)
9575 || (m1 == CCGOCmode && m2 == CCGCmode))
9576 return CCGCmode;
9578 switch (m1)
9580 default:
9581 gcc_unreachable ();
9583 case CCmode:
9584 case CCGCmode:
9585 case CCGOCmode:
9586 case CCNOmode:
9587 case CCZmode:
9588 switch (m2)
9590 default:
9591 return VOIDmode;
9593 case CCmode:
9594 case CCGCmode:
9595 case CCGOCmode:
9596 case CCNOmode:
9597 case CCZmode:
9598 return CCmode;
9601 case CCFPmode:
9602 case CCFPUmode:
9603 /* These are only compatible with themselves, which we already
9604 checked above. */
9605 return VOIDmode;
9609 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9612 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9614 enum rtx_code swapped_code = swap_condition (code);
9615 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9616 || (ix86_fp_comparison_cost (swapped_code)
9617 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9620 /* Swap, force into registers, or otherwise massage the two operands
9621 to a fp comparison. The operands are updated in place; the new
9622 comparison code is returned. */
9624 static enum rtx_code
9625 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9627 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9628 rtx op0 = *pop0, op1 = *pop1;
9629 enum machine_mode op_mode = GET_MODE (op0);
9630 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9632 /* All of the unordered compare instructions only work on registers.
9633 The same is true of the fcomi compare instructions. The XFmode
9634 compare instructions require registers except when comparing
9635 against zero or when converting operand 1 from fixed point to
9636 floating point. */
9638 if (!is_sse
9639 && (fpcmp_mode == CCFPUmode
9640 || (op_mode == XFmode
9641 && ! (standard_80387_constant_p (op0) == 1
9642 || standard_80387_constant_p (op1) == 1)
9643 && GET_CODE (op1) != FLOAT)
9644 || ix86_use_fcomi_compare (code)))
9646 op0 = force_reg (op_mode, op0);
9647 op1 = force_reg (op_mode, op1);
9649 else
9651 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9652 things around if they appear profitable, otherwise force op0
9653 into a register. */
9655 if (standard_80387_constant_p (op0) == 0
9656 || (GET_CODE (op0) == MEM
9657 && ! (standard_80387_constant_p (op1) == 0
9658 || GET_CODE (op1) == MEM)))
9660 rtx tmp;
9661 tmp = op0, op0 = op1, op1 = tmp;
9662 code = swap_condition (code);
9665 if (GET_CODE (op0) != REG)
9666 op0 = force_reg (op_mode, op0);
9668 if (CONSTANT_P (op1))
9670 int tmp = standard_80387_constant_p (op1);
9671 if (tmp == 0)
9672 op1 = validize_mem (force_const_mem (op_mode, op1));
9673 else if (tmp == 1)
9675 if (TARGET_CMOVE)
9676 op1 = force_reg (op_mode, op1);
9678 else
9679 op1 = force_reg (op_mode, op1);
9683 /* Try to rearrange the comparison to make it cheaper. */
9684 if (ix86_fp_comparison_cost (code)
9685 > ix86_fp_comparison_cost (swap_condition (code))
9686 && (GET_CODE (op1) == REG || !no_new_pseudos))
9688 rtx tmp;
9689 tmp = op0, op0 = op1, op1 = tmp;
9690 code = swap_condition (code);
9691 if (GET_CODE (op0) != REG)
9692 op0 = force_reg (op_mode, op0);
9695 *pop0 = op0;
9696 *pop1 = op1;
9697 return code;
9700 /* Convert comparison codes we use to represent FP comparison to integer
9701 code that will result in proper branch. Return UNKNOWN if no such code
9702 is available. */
9704 enum rtx_code
9705 ix86_fp_compare_code_to_integer (enum rtx_code code)
9707 switch (code)
9709 case GT:
9710 return GTU;
9711 case GE:
9712 return GEU;
9713 case ORDERED:
9714 case UNORDERED:
9715 return code;
9716 break;
9717 case UNEQ:
9718 return EQ;
9719 break;
9720 case UNLT:
9721 return LTU;
9722 break;
9723 case UNLE:
9724 return LEU;
9725 break;
9726 case LTGT:
9727 return NE;
9728 break;
9729 default:
9730 return UNKNOWN;
9734 /* Split comparison code CODE into comparisons we can do using branch
9735 instructions. BYPASS_CODE is comparison code for branch that will
9736 branch around FIRST_CODE and SECOND_CODE. If some of branches
9737 is not required, set value to UNKNOWN.
9738 We never require more than two branches. */
9740 void
9741 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9742 enum rtx_code *first_code,
9743 enum rtx_code *second_code)
9745 *first_code = code;
9746 *bypass_code = UNKNOWN;
9747 *second_code = UNKNOWN;
9749 /* The fcomi comparison sets flags as follows:
9751 cmp ZF PF CF
9752 > 0 0 0
9753 < 0 0 1
9754 = 1 0 0
9755 un 1 1 1 */
9757 switch (code)
9759 case GT: /* GTU - CF=0 & ZF=0 */
9760 case GE: /* GEU - CF=0 */
9761 case ORDERED: /* PF=0 */
9762 case UNORDERED: /* PF=1 */
9763 case UNEQ: /* EQ - ZF=1 */
9764 case UNLT: /* LTU - CF=1 */
9765 case UNLE: /* LEU - CF=1 | ZF=1 */
9766 case LTGT: /* EQ - ZF=0 */
9767 break;
9768 case LT: /* LTU - CF=1 - fails on unordered */
9769 *first_code = UNLT;
9770 *bypass_code = UNORDERED;
9771 break;
9772 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9773 *first_code = UNLE;
9774 *bypass_code = UNORDERED;
9775 break;
9776 case EQ: /* EQ - ZF=1 - fails on unordered */
9777 *first_code = UNEQ;
9778 *bypass_code = UNORDERED;
9779 break;
9780 case NE: /* NE - ZF=0 - fails on unordered */
9781 *first_code = LTGT;
9782 *second_code = UNORDERED;
9783 break;
9784 case UNGE: /* GEU - CF=0 - fails on unordered */
9785 *first_code = GE;
9786 *second_code = UNORDERED;
9787 break;
9788 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9789 *first_code = GT;
9790 *second_code = UNORDERED;
9791 break;
9792 default:
9793 gcc_unreachable ();
9795 if (!TARGET_IEEE_FP)
9797 *second_code = UNKNOWN;
9798 *bypass_code = UNKNOWN;
9802 /* Return cost of comparison done fcom + arithmetics operations on AX.
9803 All following functions do use number of instructions as a cost metrics.
9804 In future this should be tweaked to compute bytes for optimize_size and
9805 take into account performance of various instructions on various CPUs. */
9806 static int
9807 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9809 if (!TARGET_IEEE_FP)
9810 return 4;
9811 /* The cost of code output by ix86_expand_fp_compare. */
9812 switch (code)
9814 case UNLE:
9815 case UNLT:
9816 case LTGT:
9817 case GT:
9818 case GE:
9819 case UNORDERED:
9820 case ORDERED:
9821 case UNEQ:
9822 return 4;
9823 break;
9824 case LT:
9825 case NE:
9826 case EQ:
9827 case UNGE:
9828 return 5;
9829 break;
9830 case LE:
9831 case UNGT:
9832 return 6;
9833 break;
9834 default:
9835 gcc_unreachable ();
9839 /* Return cost of comparison done using fcomi operation.
9840 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9841 static int
9842 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9844 enum rtx_code bypass_code, first_code, second_code;
9845 /* Return arbitrarily high cost when instruction is not supported - this
9846 prevents gcc from using it. */
9847 if (!TARGET_CMOVE)
9848 return 1024;
9849 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9850 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9853 /* Return cost of comparison done using sahf operation.
9854 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9855 static int
9856 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9858 enum rtx_code bypass_code, first_code, second_code;
9859 /* Return arbitrarily high cost when instruction is not preferred - this
9860 avoids gcc from using it. */
9861 if (!TARGET_USE_SAHF && !optimize_size)
9862 return 1024;
9863 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9864 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9867 /* Compute cost of the comparison done using any method.
9868 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9869 static int
9870 ix86_fp_comparison_cost (enum rtx_code code)
9872 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9873 int min;
9875 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9876 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9878 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9879 if (min > sahf_cost)
9880 min = sahf_cost;
9881 if (min > fcomi_cost)
9882 min = fcomi_cost;
9883 return min;
9886 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9888 static rtx
9889 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9890 rtx *second_test, rtx *bypass_test)
9892 enum machine_mode fpcmp_mode, intcmp_mode;
9893 rtx tmp, tmp2;
9894 int cost = ix86_fp_comparison_cost (code);
9895 enum rtx_code bypass_code, first_code, second_code;
9897 fpcmp_mode = ix86_fp_compare_mode (code);
9898 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9900 if (second_test)
9901 *second_test = NULL_RTX;
9902 if (bypass_test)
9903 *bypass_test = NULL_RTX;
9905 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9907 /* Do fcomi/sahf based test when profitable. */
9908 if ((bypass_code == UNKNOWN || bypass_test)
9909 && (second_code == UNKNOWN || second_test)
9910 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9912 if (TARGET_CMOVE)
9914 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9915 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9916 tmp);
9917 emit_insn (tmp);
9919 else
9921 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9922 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9923 if (!scratch)
9924 scratch = gen_reg_rtx (HImode);
9925 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9926 emit_insn (gen_x86_sahf_1 (scratch));
9929 /* The FP codes work out to act like unsigned. */
9930 intcmp_mode = fpcmp_mode;
9931 code = first_code;
9932 if (bypass_code != UNKNOWN)
9933 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9934 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9935 const0_rtx);
9936 if (second_code != UNKNOWN)
9937 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9938 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9939 const0_rtx);
9941 else
9943 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9944 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9945 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9946 if (!scratch)
9947 scratch = gen_reg_rtx (HImode);
9948 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9950 /* In the unordered case, we have to check C2 for NaN's, which
9951 doesn't happen to work out to anything nice combination-wise.
9952 So do some bit twiddling on the value we've got in AH to come
9953 up with an appropriate set of condition codes. */
9955 intcmp_mode = CCNOmode;
9956 switch (code)
9958 case GT:
9959 case UNGT:
9960 if (code == GT || !TARGET_IEEE_FP)
9962 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9963 code = EQ;
9965 else
9967 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9968 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9969 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9970 intcmp_mode = CCmode;
9971 code = GEU;
9973 break;
9974 case LT:
9975 case UNLT:
9976 if (code == LT && TARGET_IEEE_FP)
9978 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9979 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9980 intcmp_mode = CCmode;
9981 code = EQ;
9983 else
9985 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9986 code = NE;
9988 break;
9989 case GE:
9990 case UNGE:
9991 if (code == GE || !TARGET_IEEE_FP)
9993 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9994 code = EQ;
9996 else
9998 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9999 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10000 GEN_INT (0x01)));
10001 code = NE;
10003 break;
10004 case LE:
10005 case UNLE:
10006 if (code == LE && TARGET_IEEE_FP)
10008 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10009 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10010 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10011 intcmp_mode = CCmode;
10012 code = LTU;
10014 else
10016 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10017 code = NE;
10019 break;
10020 case EQ:
10021 case UNEQ:
10022 if (code == EQ && TARGET_IEEE_FP)
10024 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10025 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10026 intcmp_mode = CCmode;
10027 code = EQ;
10029 else
10031 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10032 code = NE;
10033 break;
10035 break;
10036 case NE:
10037 case LTGT:
10038 if (code == NE && TARGET_IEEE_FP)
10040 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10041 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10042 GEN_INT (0x40)));
10043 code = NE;
10045 else
10047 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10048 code = EQ;
10050 break;
10052 case UNORDERED:
10053 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10054 code = NE;
10055 break;
10056 case ORDERED:
10057 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10058 code = EQ;
10059 break;
10061 default:
10062 gcc_unreachable ();
10066 /* Return the test that should be put into the flags user, i.e.
10067 the bcc, scc, or cmov instruction. */
10068 return gen_rtx_fmt_ee (code, VOIDmode,
10069 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10070 const0_rtx);
10074 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10076 rtx op0, op1, ret;
10077 op0 = ix86_compare_op0;
10078 op1 = ix86_compare_op1;
10080 if (second_test)
10081 *second_test = NULL_RTX;
10082 if (bypass_test)
10083 *bypass_test = NULL_RTX;
10085 if (ix86_compare_emitted)
10087 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10088 ix86_compare_emitted = NULL_RTX;
10090 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10091 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10092 second_test, bypass_test);
10093 else
10094 ret = ix86_expand_int_compare (code, op0, op1);
10096 return ret;
10099 /* Return true if the CODE will result in nontrivial jump sequence. */
10100 bool
10101 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10103 enum rtx_code bypass_code, first_code, second_code;
10104 if (!TARGET_CMOVE)
10105 return true;
10106 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10107 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10110 void
10111 ix86_expand_branch (enum rtx_code code, rtx label)
10113 rtx tmp;
10115 switch (GET_MODE (ix86_compare_op0))
10117 case QImode:
10118 case HImode:
10119 case SImode:
10120 simple:
10121 tmp = ix86_expand_compare (code, NULL, NULL);
10122 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10123 gen_rtx_LABEL_REF (VOIDmode, label),
10124 pc_rtx);
10125 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10126 return;
10128 case SFmode:
10129 case DFmode:
10130 case XFmode:
10132 rtvec vec;
10133 int use_fcomi;
10134 enum rtx_code bypass_code, first_code, second_code;
10136 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10137 &ix86_compare_op1);
10139 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10141 /* Check whether we will use the natural sequence with one jump. If
10142 so, we can expand jump early. Otherwise delay expansion by
10143 creating compound insn to not confuse optimizers. */
10144 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10145 && TARGET_CMOVE)
10147 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10148 gen_rtx_LABEL_REF (VOIDmode, label),
10149 pc_rtx, NULL_RTX, NULL_RTX);
10151 else
10153 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10154 ix86_compare_op0, ix86_compare_op1);
10155 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10156 gen_rtx_LABEL_REF (VOIDmode, label),
10157 pc_rtx);
10158 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10160 use_fcomi = ix86_use_fcomi_compare (code);
10161 vec = rtvec_alloc (3 + !use_fcomi);
10162 RTVEC_ELT (vec, 0) = tmp;
10163 RTVEC_ELT (vec, 1)
10164 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10165 RTVEC_ELT (vec, 2)
10166 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10167 if (! use_fcomi)
10168 RTVEC_ELT (vec, 3)
10169 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10171 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10173 return;
10176 case DImode:
10177 if (TARGET_64BIT)
10178 goto simple;
10179 case TImode:
10180 /* Expand DImode branch into multiple compare+branch. */
10182 rtx lo[2], hi[2], label2;
10183 enum rtx_code code1, code2, code3;
10184 enum machine_mode submode;
10186 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10188 tmp = ix86_compare_op0;
10189 ix86_compare_op0 = ix86_compare_op1;
10190 ix86_compare_op1 = tmp;
10191 code = swap_condition (code);
10193 if (GET_MODE (ix86_compare_op0) == DImode)
10195 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10196 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10197 submode = SImode;
10199 else
10201 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10202 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10203 submode = DImode;
10206 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10207 avoid two branches. This costs one extra insn, so disable when
10208 optimizing for size. */
10210 if ((code == EQ || code == NE)
10211 && (!optimize_size
10212 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10214 rtx xor0, xor1;
10216 xor1 = hi[0];
10217 if (hi[1] != const0_rtx)
10218 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10219 NULL_RTX, 0, OPTAB_WIDEN);
10221 xor0 = lo[0];
10222 if (lo[1] != const0_rtx)
10223 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10224 NULL_RTX, 0, OPTAB_WIDEN);
10226 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10227 NULL_RTX, 0, OPTAB_WIDEN);
10229 ix86_compare_op0 = tmp;
10230 ix86_compare_op1 = const0_rtx;
10231 ix86_expand_branch (code, label);
10232 return;
10235 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10236 op1 is a constant and the low word is zero, then we can just
10237 examine the high word. */
10239 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10240 switch (code)
10242 case LT: case LTU: case GE: case GEU:
10243 ix86_compare_op0 = hi[0];
10244 ix86_compare_op1 = hi[1];
10245 ix86_expand_branch (code, label);
10246 return;
10247 default:
10248 break;
10251 /* Otherwise, we need two or three jumps. */
10253 label2 = gen_label_rtx ();
10255 code1 = code;
10256 code2 = swap_condition (code);
10257 code3 = unsigned_condition (code);
10259 switch (code)
10261 case LT: case GT: case LTU: case GTU:
10262 break;
10264 case LE: code1 = LT; code2 = GT; break;
10265 case GE: code1 = GT; code2 = LT; break;
10266 case LEU: code1 = LTU; code2 = GTU; break;
10267 case GEU: code1 = GTU; code2 = LTU; break;
10269 case EQ: code1 = UNKNOWN; code2 = NE; break;
10270 case NE: code2 = UNKNOWN; break;
10272 default:
10273 gcc_unreachable ();
10277 * a < b =>
10278 * if (hi(a) < hi(b)) goto true;
10279 * if (hi(a) > hi(b)) goto false;
10280 * if (lo(a) < lo(b)) goto true;
10281 * false:
10284 ix86_compare_op0 = hi[0];
10285 ix86_compare_op1 = hi[1];
10287 if (code1 != UNKNOWN)
10288 ix86_expand_branch (code1, label);
10289 if (code2 != UNKNOWN)
10290 ix86_expand_branch (code2, label2);
10292 ix86_compare_op0 = lo[0];
10293 ix86_compare_op1 = lo[1];
10294 ix86_expand_branch (code3, label);
10296 if (code2 != UNKNOWN)
10297 emit_label (label2);
10298 return;
10301 default:
10302 gcc_unreachable ();
10306 /* Split branch based on floating point condition. */
10307 void
10308 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10309 rtx target1, rtx target2, rtx tmp, rtx pushed)
10311 rtx second, bypass;
10312 rtx label = NULL_RTX;
10313 rtx condition;
10314 int bypass_probability = -1, second_probability = -1, probability = -1;
10315 rtx i;
10317 if (target2 != pc_rtx)
10319 rtx tmp = target2;
10320 code = reverse_condition_maybe_unordered (code);
10321 target2 = target1;
10322 target1 = tmp;
10325 condition = ix86_expand_fp_compare (code, op1, op2,
10326 tmp, &second, &bypass);
10328 /* Remove pushed operand from stack. */
10329 if (pushed)
10330 ix86_free_from_memory (GET_MODE (pushed));
10332 if (split_branch_probability >= 0)
10334 /* Distribute the probabilities across the jumps.
10335 Assume the BYPASS and SECOND to be always test
10336 for UNORDERED. */
10337 probability = split_branch_probability;
10339 /* Value of 1 is low enough to make no need for probability
10340 to be updated. Later we may run some experiments and see
10341 if unordered values are more frequent in practice. */
10342 if (bypass)
10343 bypass_probability = 1;
10344 if (second)
10345 second_probability = 1;
10347 if (bypass != NULL_RTX)
10349 label = gen_label_rtx ();
10350 i = emit_jump_insn (gen_rtx_SET
10351 (VOIDmode, pc_rtx,
10352 gen_rtx_IF_THEN_ELSE (VOIDmode,
10353 bypass,
10354 gen_rtx_LABEL_REF (VOIDmode,
10355 label),
10356 pc_rtx)));
10357 if (bypass_probability >= 0)
10358 REG_NOTES (i)
10359 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10360 GEN_INT (bypass_probability),
10361 REG_NOTES (i));
10363 i = emit_jump_insn (gen_rtx_SET
10364 (VOIDmode, pc_rtx,
10365 gen_rtx_IF_THEN_ELSE (VOIDmode,
10366 condition, target1, target2)));
10367 if (probability >= 0)
10368 REG_NOTES (i)
10369 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10370 GEN_INT (probability),
10371 REG_NOTES (i));
10372 if (second != NULL_RTX)
10374 i = emit_jump_insn (gen_rtx_SET
10375 (VOIDmode, pc_rtx,
10376 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10377 target2)));
10378 if (second_probability >= 0)
10379 REG_NOTES (i)
10380 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10381 GEN_INT (second_probability),
10382 REG_NOTES (i));
10384 if (label != NULL_RTX)
10385 emit_label (label);
10389 ix86_expand_setcc (enum rtx_code code, rtx dest)
10391 rtx ret, tmp, tmpreg, equiv;
10392 rtx second_test, bypass_test;
10394 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10395 return 0; /* FAIL */
10397 gcc_assert (GET_MODE (dest) == QImode);
10399 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10400 PUT_MODE (ret, QImode);
10402 tmp = dest;
10403 tmpreg = dest;
10405 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10406 if (bypass_test || second_test)
10408 rtx test = second_test;
10409 int bypass = 0;
10410 rtx tmp2 = gen_reg_rtx (QImode);
10411 if (bypass_test)
10413 gcc_assert (!second_test);
10414 test = bypass_test;
10415 bypass = 1;
10416 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10418 PUT_MODE (test, QImode);
10419 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10421 if (bypass)
10422 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10423 else
10424 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10427 /* Attach a REG_EQUAL note describing the comparison result. */
10428 if (ix86_compare_op0 && ix86_compare_op1)
10430 equiv = simplify_gen_relational (code, QImode,
10431 GET_MODE (ix86_compare_op0),
10432 ix86_compare_op0, ix86_compare_op1);
10433 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10436 return 1; /* DONE */
10439 /* Expand comparison setting or clearing carry flag. Return true when
10440 successful and set pop for the operation. */
10441 static bool
10442 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10444 enum machine_mode mode =
10445 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10447 /* Do not handle DImode compares that go trought special path. Also we can't
10448 deal with FP compares yet. This is possible to add. */
10449 if (mode == (TARGET_64BIT ? TImode : DImode))
10450 return false;
10451 if (FLOAT_MODE_P (mode))
10453 rtx second_test = NULL, bypass_test = NULL;
10454 rtx compare_op, compare_seq;
10456 /* Shortcut: following common codes never translate into carry flag compares. */
10457 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10458 || code == ORDERED || code == UNORDERED)
10459 return false;
10461 /* These comparisons require zero flag; swap operands so they won't. */
10462 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10463 && !TARGET_IEEE_FP)
10465 rtx tmp = op0;
10466 op0 = op1;
10467 op1 = tmp;
10468 code = swap_condition (code);
10471 /* Try to expand the comparison and verify that we end up with carry flag
10472 based comparison. This is fails to be true only when we decide to expand
10473 comparison using arithmetic that is not too common scenario. */
10474 start_sequence ();
10475 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10476 &second_test, &bypass_test);
10477 compare_seq = get_insns ();
10478 end_sequence ();
10480 if (second_test || bypass_test)
10481 return false;
10482 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10483 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10484 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10485 else
10486 code = GET_CODE (compare_op);
10487 if (code != LTU && code != GEU)
10488 return false;
10489 emit_insn (compare_seq);
10490 *pop = compare_op;
10491 return true;
10493 if (!INTEGRAL_MODE_P (mode))
10494 return false;
10495 switch (code)
10497 case LTU:
10498 case GEU:
10499 break;
10501 /* Convert a==0 into (unsigned)a<1. */
10502 case EQ:
10503 case NE:
10504 if (op1 != const0_rtx)
10505 return false;
10506 op1 = const1_rtx;
10507 code = (code == EQ ? LTU : GEU);
10508 break;
10510 /* Convert a>b into b<a or a>=b-1. */
10511 case GTU:
10512 case LEU:
10513 if (GET_CODE (op1) == CONST_INT)
10515 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10516 /* Bail out on overflow. We still can swap operands but that
10517 would force loading of the constant into register. */
10518 if (op1 == const0_rtx
10519 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10520 return false;
10521 code = (code == GTU ? GEU : LTU);
10523 else
10525 rtx tmp = op1;
10526 op1 = op0;
10527 op0 = tmp;
10528 code = (code == GTU ? LTU : GEU);
10530 break;
10532 /* Convert a>=0 into (unsigned)a<0x80000000. */
10533 case LT:
10534 case GE:
10535 if (mode == DImode || op1 != const0_rtx)
10536 return false;
10537 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10538 code = (code == LT ? GEU : LTU);
10539 break;
10540 case LE:
10541 case GT:
10542 if (mode == DImode || op1 != constm1_rtx)
10543 return false;
10544 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10545 code = (code == LE ? GEU : LTU);
10546 break;
10548 default:
10549 return false;
10551 /* Swapping operands may cause constant to appear as first operand. */
10552 if (!nonimmediate_operand (op0, VOIDmode))
10554 if (no_new_pseudos)
10555 return false;
10556 op0 = force_reg (mode, op0);
10558 ix86_compare_op0 = op0;
10559 ix86_compare_op1 = op1;
10560 *pop = ix86_expand_compare (code, NULL, NULL);
10561 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10562 return true;
10566 ix86_expand_int_movcc (rtx operands[])
10568 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10569 rtx compare_seq, compare_op;
10570 rtx second_test, bypass_test;
10571 enum machine_mode mode = GET_MODE (operands[0]);
10572 bool sign_bit_compare_p = false;;
10574 start_sequence ();
10575 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10576 compare_seq = get_insns ();
10577 end_sequence ();
10579 compare_code = GET_CODE (compare_op);
10581 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10582 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10583 sign_bit_compare_p = true;
10585 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10586 HImode insns, we'd be swallowed in word prefix ops. */
10588 if ((mode != HImode || TARGET_FAST_PREFIX)
10589 && (mode != (TARGET_64BIT ? TImode : DImode))
10590 && GET_CODE (operands[2]) == CONST_INT
10591 && GET_CODE (operands[3]) == CONST_INT)
10593 rtx out = operands[0];
10594 HOST_WIDE_INT ct = INTVAL (operands[2]);
10595 HOST_WIDE_INT cf = INTVAL (operands[3]);
10596 HOST_WIDE_INT diff;
10598 diff = ct - cf;
10599 /* Sign bit compares are better done using shifts than we do by using
10600 sbb. */
10601 if (sign_bit_compare_p
10602 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10603 ix86_compare_op1, &compare_op))
10605 /* Detect overlap between destination and compare sources. */
10606 rtx tmp = out;
10608 if (!sign_bit_compare_p)
10610 bool fpcmp = false;
10612 compare_code = GET_CODE (compare_op);
10614 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10615 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10617 fpcmp = true;
10618 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10621 /* To simplify rest of code, restrict to the GEU case. */
10622 if (compare_code == LTU)
10624 HOST_WIDE_INT tmp = ct;
10625 ct = cf;
10626 cf = tmp;
10627 compare_code = reverse_condition (compare_code);
10628 code = reverse_condition (code);
10630 else
10632 if (fpcmp)
10633 PUT_CODE (compare_op,
10634 reverse_condition_maybe_unordered
10635 (GET_CODE (compare_op)));
10636 else
10637 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10639 diff = ct - cf;
10641 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10642 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10643 tmp = gen_reg_rtx (mode);
10645 if (mode == DImode)
10646 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10647 else
10648 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10650 else
10652 if (code == GT || code == GE)
10653 code = reverse_condition (code);
10654 else
10656 HOST_WIDE_INT tmp = ct;
10657 ct = cf;
10658 cf = tmp;
10659 diff = ct - cf;
10661 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10662 ix86_compare_op1, VOIDmode, 0, -1);
10665 if (diff == 1)
10668 * cmpl op0,op1
10669 * sbbl dest,dest
10670 * [addl dest, ct]
10672 * Size 5 - 8.
10674 if (ct)
10675 tmp = expand_simple_binop (mode, PLUS,
10676 tmp, GEN_INT (ct),
10677 copy_rtx (tmp), 1, OPTAB_DIRECT);
10679 else if (cf == -1)
10682 * cmpl op0,op1
10683 * sbbl dest,dest
10684 * orl $ct, dest
10686 * Size 8.
10688 tmp = expand_simple_binop (mode, IOR,
10689 tmp, GEN_INT (ct),
10690 copy_rtx (tmp), 1, OPTAB_DIRECT);
10692 else if (diff == -1 && ct)
10695 * cmpl op0,op1
10696 * sbbl dest,dest
10697 * notl dest
10698 * [addl dest, cf]
10700 * Size 8 - 11.
10702 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10703 if (cf)
10704 tmp = expand_simple_binop (mode, PLUS,
10705 copy_rtx (tmp), GEN_INT (cf),
10706 copy_rtx (tmp), 1, OPTAB_DIRECT);
10708 else
10711 * cmpl op0,op1
10712 * sbbl dest,dest
10713 * [notl dest]
10714 * andl cf - ct, dest
10715 * [addl dest, ct]
10717 * Size 8 - 11.
10720 if (cf == 0)
10722 cf = ct;
10723 ct = 0;
10724 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10727 tmp = expand_simple_binop (mode, AND,
10728 copy_rtx (tmp),
10729 gen_int_mode (cf - ct, mode),
10730 copy_rtx (tmp), 1, OPTAB_DIRECT);
10731 if (ct)
10732 tmp = expand_simple_binop (mode, PLUS,
10733 copy_rtx (tmp), GEN_INT (ct),
10734 copy_rtx (tmp), 1, OPTAB_DIRECT);
10737 if (!rtx_equal_p (tmp, out))
10738 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10740 return 1; /* DONE */
10743 if (diff < 0)
10745 HOST_WIDE_INT tmp;
10746 tmp = ct, ct = cf, cf = tmp;
10747 diff = -diff;
10748 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10750 /* We may be reversing unordered compare to normal compare, that
10751 is not valid in general (we may convert non-trapping condition
10752 to trapping one), however on i386 we currently emit all
10753 comparisons unordered. */
10754 compare_code = reverse_condition_maybe_unordered (compare_code);
10755 code = reverse_condition_maybe_unordered (code);
10757 else
10759 compare_code = reverse_condition (compare_code);
10760 code = reverse_condition (code);
10764 compare_code = UNKNOWN;
10765 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10766 && GET_CODE (ix86_compare_op1) == CONST_INT)
10768 if (ix86_compare_op1 == const0_rtx
10769 && (code == LT || code == GE))
10770 compare_code = code;
10771 else if (ix86_compare_op1 == constm1_rtx)
10773 if (code == LE)
10774 compare_code = LT;
10775 else if (code == GT)
10776 compare_code = GE;
10780 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10781 if (compare_code != UNKNOWN
10782 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10783 && (cf == -1 || ct == -1))
10785 /* If lea code below could be used, only optimize
10786 if it results in a 2 insn sequence. */
10788 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10789 || diff == 3 || diff == 5 || diff == 9)
10790 || (compare_code == LT && ct == -1)
10791 || (compare_code == GE && cf == -1))
10794 * notl op1 (if necessary)
10795 * sarl $31, op1
10796 * orl cf, op1
10798 if (ct != -1)
10800 cf = ct;
10801 ct = -1;
10802 code = reverse_condition (code);
10805 out = emit_store_flag (out, code, ix86_compare_op0,
10806 ix86_compare_op1, VOIDmode, 0, -1);
10808 out = expand_simple_binop (mode, IOR,
10809 out, GEN_INT (cf),
10810 out, 1, OPTAB_DIRECT);
10811 if (out != operands[0])
10812 emit_move_insn (operands[0], out);
10814 return 1; /* DONE */
10819 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10820 || diff == 3 || diff == 5 || diff == 9)
10821 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10822 && (mode != DImode
10823 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
10826 * xorl dest,dest
10827 * cmpl op1,op2
10828 * setcc dest
10829 * lea cf(dest*(ct-cf)),dest
10831 * Size 14.
10833 * This also catches the degenerate setcc-only case.
10836 rtx tmp;
10837 int nops;
10839 out = emit_store_flag (out, code, ix86_compare_op0,
10840 ix86_compare_op1, VOIDmode, 0, 1);
10842 nops = 0;
10843 /* On x86_64 the lea instruction operates on Pmode, so we need
10844 to get arithmetics done in proper mode to match. */
10845 if (diff == 1)
10846 tmp = copy_rtx (out);
10847 else
10849 rtx out1;
10850 out1 = copy_rtx (out);
10851 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10852 nops++;
10853 if (diff & 1)
10855 tmp = gen_rtx_PLUS (mode, tmp, out1);
10856 nops++;
10859 if (cf != 0)
10861 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10862 nops++;
10864 if (!rtx_equal_p (tmp, out))
10866 if (nops == 1)
10867 out = force_operand (tmp, copy_rtx (out));
10868 else
10869 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10871 if (!rtx_equal_p (out, operands[0]))
10872 emit_move_insn (operands[0], copy_rtx (out));
10874 return 1; /* DONE */
10878 * General case: Jumpful:
10879 * xorl dest,dest cmpl op1, op2
10880 * cmpl op1, op2 movl ct, dest
10881 * setcc dest jcc 1f
10882 * decl dest movl cf, dest
10883 * andl (cf-ct),dest 1:
10884 * addl ct,dest
10886 * Size 20. Size 14.
10888 * This is reasonably steep, but branch mispredict costs are
10889 * high on modern cpus, so consider failing only if optimizing
10890 * for space.
10893 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10894 && BRANCH_COST >= 2)
10896 if (cf == 0)
10898 cf = ct;
10899 ct = 0;
10900 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10901 /* We may be reversing unordered compare to normal compare,
10902 that is not valid in general (we may convert non-trapping
10903 condition to trapping one), however on i386 we currently
10904 emit all comparisons unordered. */
10905 code = reverse_condition_maybe_unordered (code);
10906 else
10908 code = reverse_condition (code);
10909 if (compare_code != UNKNOWN)
10910 compare_code = reverse_condition (compare_code);
10914 if (compare_code != UNKNOWN)
10916 /* notl op1 (if needed)
10917 sarl $31, op1
10918 andl (cf-ct), op1
10919 addl ct, op1
10921 For x < 0 (resp. x <= -1) there will be no notl,
10922 so if possible swap the constants to get rid of the
10923 complement.
10924 True/false will be -1/0 while code below (store flag
10925 followed by decrement) is 0/-1, so the constants need
10926 to be exchanged once more. */
10928 if (compare_code == GE || !cf)
10930 code = reverse_condition (code);
10931 compare_code = LT;
10933 else
10935 HOST_WIDE_INT tmp = cf;
10936 cf = ct;
10937 ct = tmp;
10940 out = emit_store_flag (out, code, ix86_compare_op0,
10941 ix86_compare_op1, VOIDmode, 0, -1);
10943 else
10945 out = emit_store_flag (out, code, ix86_compare_op0,
10946 ix86_compare_op1, VOIDmode, 0, 1);
10948 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10949 copy_rtx (out), 1, OPTAB_DIRECT);
10952 out = expand_simple_binop (mode, AND, copy_rtx (out),
10953 gen_int_mode (cf - ct, mode),
10954 copy_rtx (out), 1, OPTAB_DIRECT);
10955 if (ct)
10956 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10957 copy_rtx (out), 1, OPTAB_DIRECT);
10958 if (!rtx_equal_p (out, operands[0]))
10959 emit_move_insn (operands[0], copy_rtx (out));
10961 return 1; /* DONE */
10965 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10967 /* Try a few things more with specific constants and a variable. */
10969 optab op;
10970 rtx var, orig_out, out, tmp;
10972 if (BRANCH_COST <= 2)
10973 return 0; /* FAIL */
10975 /* If one of the two operands is an interesting constant, load a
10976 constant with the above and mask it in with a logical operation. */
10978 if (GET_CODE (operands[2]) == CONST_INT)
10980 var = operands[3];
10981 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10982 operands[3] = constm1_rtx, op = and_optab;
10983 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10984 operands[3] = const0_rtx, op = ior_optab;
10985 else
10986 return 0; /* FAIL */
10988 else if (GET_CODE (operands[3]) == CONST_INT)
10990 var = operands[2];
10991 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10992 operands[2] = constm1_rtx, op = and_optab;
10993 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10994 operands[2] = const0_rtx, op = ior_optab;
10995 else
10996 return 0; /* FAIL */
10998 else
10999 return 0; /* FAIL */
11001 orig_out = operands[0];
11002 tmp = gen_reg_rtx (mode);
11003 operands[0] = tmp;
11005 /* Recurse to get the constant loaded. */
11006 if (ix86_expand_int_movcc (operands) == 0)
11007 return 0; /* FAIL */
11009 /* Mask in the interesting variable. */
11010 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11011 OPTAB_WIDEN);
11012 if (!rtx_equal_p (out, orig_out))
11013 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11015 return 1; /* DONE */
11019 * For comparison with above,
11021 * movl cf,dest
11022 * movl ct,tmp
11023 * cmpl op1,op2
11024 * cmovcc tmp,dest
11026 * Size 15.
11029 if (! nonimmediate_operand (operands[2], mode))
11030 operands[2] = force_reg (mode, operands[2]);
11031 if (! nonimmediate_operand (operands[3], mode))
11032 operands[3] = force_reg (mode, operands[3]);
11034 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11036 rtx tmp = gen_reg_rtx (mode);
11037 emit_move_insn (tmp, operands[3]);
11038 operands[3] = tmp;
11040 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11042 rtx tmp = gen_reg_rtx (mode);
11043 emit_move_insn (tmp, operands[2]);
11044 operands[2] = tmp;
11047 if (! register_operand (operands[2], VOIDmode)
11048 && (mode == QImode
11049 || ! register_operand (operands[3], VOIDmode)))
11050 operands[2] = force_reg (mode, operands[2]);
11052 if (mode == QImode
11053 && ! register_operand (operands[3], VOIDmode))
11054 operands[3] = force_reg (mode, operands[3]);
11056 emit_insn (compare_seq);
11057 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11058 gen_rtx_IF_THEN_ELSE (mode,
11059 compare_op, operands[2],
11060 operands[3])));
11061 if (bypass_test)
11062 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11063 gen_rtx_IF_THEN_ELSE (mode,
11064 bypass_test,
11065 copy_rtx (operands[3]),
11066 copy_rtx (operands[0]))));
11067 if (second_test)
11068 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11069 gen_rtx_IF_THEN_ELSE (mode,
11070 second_test,
11071 copy_rtx (operands[2]),
11072 copy_rtx (operands[0]))));
11074 return 1; /* DONE */
11077 /* Swap, force into registers, or otherwise massage the two operands
11078 to an sse comparison with a mask result. Thus we differ a bit from
11079 ix86_prepare_fp_compare_args which expects to produce a flags result.
11081 The DEST operand exists to help determine whether to commute commutative
11082 operators. The POP0/POP1 operands are updated in place. The new
11083 comparison code is returned, or UNKNOWN if not implementable. */
11085 static enum rtx_code
11086 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11087 rtx *pop0, rtx *pop1)
11089 rtx tmp;
11091 switch (code)
11093 case LTGT:
11094 case UNEQ:
11095 /* We have no LTGT as an operator. We could implement it with
11096 NE & ORDERED, but this requires an extra temporary. It's
11097 not clear that it's worth it. */
11098 return UNKNOWN;
11100 case LT:
11101 case LE:
11102 case UNGT:
11103 case UNGE:
11104 /* These are supported directly. */
11105 break;
11107 case EQ:
11108 case NE:
11109 case UNORDERED:
11110 case ORDERED:
11111 /* For commutative operators, try to canonicalize the destination
11112 operand to be first in the comparison - this helps reload to
11113 avoid extra moves. */
11114 if (!dest || !rtx_equal_p (dest, *pop1))
11115 break;
11116 /* FALLTHRU */
11118 case GE:
11119 case GT:
11120 case UNLE:
11121 case UNLT:
11122 /* These are not supported directly. Swap the comparison operands
11123 to transform into something that is supported. */
11124 tmp = *pop0;
11125 *pop0 = *pop1;
11126 *pop1 = tmp;
11127 code = swap_condition (code);
11128 break;
11130 default:
11131 gcc_unreachable ();
11134 return code;
11137 /* Detect conditional moves that exactly match min/max operational
11138 semantics. Note that this is IEEE safe, as long as we don't
11139 interchange the operands.
11141 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11142 and TRUE if the operation is successful and instructions are emitted. */
11144 static bool
11145 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11146 rtx cmp_op1, rtx if_true, rtx if_false)
11148 enum machine_mode mode;
11149 bool is_min;
11150 rtx tmp;
11152 if (code == LT)
11154 else if (code == UNGE)
11156 tmp = if_true;
11157 if_true = if_false;
11158 if_false = tmp;
11160 else
11161 return false;
11163 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11164 is_min = true;
11165 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11166 is_min = false;
11167 else
11168 return false;
11170 mode = GET_MODE (dest);
11172 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11173 but MODE may be a vector mode and thus not appropriate. */
11174 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11176 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11177 rtvec v;
11179 if_true = force_reg (mode, if_true);
11180 v = gen_rtvec (2, if_true, if_false);
11181 tmp = gen_rtx_UNSPEC (mode, v, u);
11183 else
11185 code = is_min ? SMIN : SMAX;
11186 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11189 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11190 return true;
11193 /* Expand an sse vector comparison. Return the register with the result. */
11195 static rtx
11196 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11197 rtx op_true, rtx op_false)
11199 enum machine_mode mode = GET_MODE (dest);
11200 rtx x;
11202 cmp_op0 = force_reg (mode, cmp_op0);
11203 if (!nonimmediate_operand (cmp_op1, mode))
11204 cmp_op1 = force_reg (mode, cmp_op1);
11206 if (optimize
11207 || reg_overlap_mentioned_p (dest, op_true)
11208 || reg_overlap_mentioned_p (dest, op_false))
11209 dest = gen_reg_rtx (mode);
11211 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11212 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11214 return dest;
11217 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11218 operations. This is used for both scalar and vector conditional moves. */
11220 static void
11221 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11223 enum machine_mode mode = GET_MODE (dest);
11224 rtx t2, t3, x;
11226 if (op_false == CONST0_RTX (mode))
11228 op_true = force_reg (mode, op_true);
11229 x = gen_rtx_AND (mode, cmp, op_true);
11230 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11232 else if (op_true == CONST0_RTX (mode))
11234 op_false = force_reg (mode, op_false);
11235 x = gen_rtx_NOT (mode, cmp);
11236 x = gen_rtx_AND (mode, x, op_false);
11237 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11239 else
11241 op_true = force_reg (mode, op_true);
11242 op_false = force_reg (mode, op_false);
11244 t2 = gen_reg_rtx (mode);
11245 if (optimize)
11246 t3 = gen_reg_rtx (mode);
11247 else
11248 t3 = dest;
11250 x = gen_rtx_AND (mode, op_true, cmp);
11251 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11253 x = gen_rtx_NOT (mode, cmp);
11254 x = gen_rtx_AND (mode, x, op_false);
11255 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11257 x = gen_rtx_IOR (mode, t3, t2);
11258 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11262 /* Expand a floating-point conditional move. Return true if successful. */
11265 ix86_expand_fp_movcc (rtx operands[])
11267 enum machine_mode mode = GET_MODE (operands[0]);
11268 enum rtx_code code = GET_CODE (operands[1]);
11269 rtx tmp, compare_op, second_test, bypass_test;
11271 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11273 enum machine_mode cmode;
11275 /* Since we've no cmove for sse registers, don't force bad register
11276 allocation just to gain access to it. Deny movcc when the
11277 comparison mode doesn't match the move mode. */
11278 cmode = GET_MODE (ix86_compare_op0);
11279 if (cmode == VOIDmode)
11280 cmode = GET_MODE (ix86_compare_op1);
11281 if (cmode != mode)
11282 return 0;
11284 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11285 &ix86_compare_op0,
11286 &ix86_compare_op1);
11287 if (code == UNKNOWN)
11288 return 0;
11290 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11291 ix86_compare_op1, operands[2],
11292 operands[3]))
11293 return 1;
11295 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11296 ix86_compare_op1, operands[2], operands[3]);
11297 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11298 return 1;
11301 /* The floating point conditional move instructions don't directly
11302 support conditions resulting from a signed integer comparison. */
11304 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11306 /* The floating point conditional move instructions don't directly
11307 support signed integer comparisons. */
11309 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11311 gcc_assert (!second_test && !bypass_test);
11312 tmp = gen_reg_rtx (QImode);
11313 ix86_expand_setcc (code, tmp);
11314 code = NE;
11315 ix86_compare_op0 = tmp;
11316 ix86_compare_op1 = const0_rtx;
11317 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11319 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11321 tmp = gen_reg_rtx (mode);
11322 emit_move_insn (tmp, operands[3]);
11323 operands[3] = tmp;
11325 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11327 tmp = gen_reg_rtx (mode);
11328 emit_move_insn (tmp, operands[2]);
11329 operands[2] = tmp;
11332 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11333 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11334 operands[2], operands[3])));
11335 if (bypass_test)
11336 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11337 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11338 operands[3], operands[0])));
11339 if (second_test)
11340 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11341 gen_rtx_IF_THEN_ELSE (mode, second_test,
11342 operands[2], operands[0])));
11344 return 1;
11347 /* Expand a floating-point vector conditional move; a vcond operation
11348 rather than a movcc operation. */
11350 bool
11351 ix86_expand_fp_vcond (rtx operands[])
11353 enum rtx_code code = GET_CODE (operands[3]);
11354 rtx cmp;
11356 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11357 &operands[4], &operands[5]);
11358 if (code == UNKNOWN)
11359 return false;
11361 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11362 operands[5], operands[1], operands[2]))
11363 return true;
11365 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11366 operands[1], operands[2]);
11367 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11368 return true;
11371 /* Expand a signed integral vector conditional move. */
11373 bool
11374 ix86_expand_int_vcond (rtx operands[])
11376 enum machine_mode mode = GET_MODE (operands[0]);
11377 enum rtx_code code = GET_CODE (operands[3]);
11378 bool negate = false;
11379 rtx x, cop0, cop1;
11381 cop0 = operands[4];
11382 cop1 = operands[5];
11384 /* Canonicalize the comparison to EQ, GT, GTU. */
11385 switch (code)
11387 case EQ:
11388 case GT:
11389 case GTU:
11390 break;
11392 case NE:
11393 case LE:
11394 case LEU:
11395 code = reverse_condition (code);
11396 negate = true;
11397 break;
11399 case GE:
11400 case GEU:
11401 code = reverse_condition (code);
11402 negate = true;
11403 /* FALLTHRU */
11405 case LT:
11406 case LTU:
11407 code = swap_condition (code);
11408 x = cop0, cop0 = cop1, cop1 = x;
11409 break;
11411 default:
11412 gcc_unreachable ();
11415 /* Unsigned parallel compare is not supported by the hardware. Play some
11416 tricks to turn this into a signed comparison against 0. */
11417 if (code == GTU)
11419 switch (mode)
11421 case V4SImode:
11423 rtx t1, t2, mask;
11425 /* Perform a parallel modulo subtraction. */
11426 t1 = gen_reg_rtx (mode);
11427 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11429 /* Extract the original sign bit of op0. */
11430 mask = GEN_INT (-0x80000000);
11431 mask = gen_rtx_CONST_VECTOR (mode,
11432 gen_rtvec (4, mask, mask, mask, mask));
11433 mask = force_reg (mode, mask);
11434 t2 = gen_reg_rtx (mode);
11435 emit_insn (gen_andv4si3 (t2, cop0, mask));
11437 /* XOR it back into the result of the subtraction. This results
11438 in the sign bit set iff we saw unsigned underflow. */
11439 x = gen_reg_rtx (mode);
11440 emit_insn (gen_xorv4si3 (x, t1, t2));
11442 code = GT;
11444 break;
11446 case V16QImode:
11447 case V8HImode:
11448 /* Perform a parallel unsigned saturating subtraction. */
11449 x = gen_reg_rtx (mode);
11450 emit_insn (gen_rtx_SET (VOIDmode, x,
11451 gen_rtx_US_MINUS (mode, cop0, cop1)));
11453 code = EQ;
11454 negate = !negate;
11455 break;
11457 default:
11458 gcc_unreachable ();
11461 cop0 = x;
11462 cop1 = CONST0_RTX (mode);
11465 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11466 operands[1+negate], operands[2-negate]);
11468 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11469 operands[2-negate]);
11470 return true;
11473 /* Expand conditional increment or decrement using adb/sbb instructions.
11474 The default case using setcc followed by the conditional move can be
11475 done by generic code. */
11477 ix86_expand_int_addcc (rtx operands[])
11479 enum rtx_code code = GET_CODE (operands[1]);
11480 rtx compare_op;
11481 rtx val = const0_rtx;
11482 bool fpcmp = false;
11483 enum machine_mode mode = GET_MODE (operands[0]);
11485 if (operands[3] != const1_rtx
11486 && operands[3] != constm1_rtx)
11487 return 0;
11488 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11489 ix86_compare_op1, &compare_op))
11490 return 0;
11491 code = GET_CODE (compare_op);
11493 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11494 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11496 fpcmp = true;
11497 code = ix86_fp_compare_code_to_integer (code);
11500 if (code != LTU)
11502 val = constm1_rtx;
11503 if (fpcmp)
11504 PUT_CODE (compare_op,
11505 reverse_condition_maybe_unordered
11506 (GET_CODE (compare_op)));
11507 else
11508 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11510 PUT_MODE (compare_op, mode);
11512 /* Construct either adc or sbb insn. */
11513 if ((code == LTU) == (operands[3] == constm1_rtx))
11515 switch (GET_MODE (operands[0]))
11517 case QImode:
11518 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11519 break;
11520 case HImode:
11521 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11522 break;
11523 case SImode:
11524 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11525 break;
11526 case DImode:
11527 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11528 break;
11529 default:
11530 gcc_unreachable ();
11533 else
11535 switch (GET_MODE (operands[0]))
11537 case QImode:
11538 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11539 break;
11540 case HImode:
11541 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11542 break;
11543 case SImode:
11544 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11545 break;
11546 case DImode:
11547 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11548 break;
11549 default:
11550 gcc_unreachable ();
11553 return 1; /* DONE */
11557 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11558 works for floating pointer parameters and nonoffsetable memories.
11559 For pushes, it returns just stack offsets; the values will be saved
11560 in the right order. Maximally three parts are generated. */
11562 static int
11563 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11565 int size;
11567 if (!TARGET_64BIT)
11568 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11569 else
11570 size = (GET_MODE_SIZE (mode) + 4) / 8;
11572 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11573 gcc_assert (size >= 2 && size <= 3);
11575 /* Optimize constant pool reference to immediates. This is used by fp
11576 moves, that force all constants to memory to allow combining. */
11577 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11579 rtx tmp = maybe_get_pool_constant (operand);
11580 if (tmp)
11581 operand = tmp;
11584 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11586 /* The only non-offsetable memories we handle are pushes. */
11587 int ok = push_operand (operand, VOIDmode);
11589 gcc_assert (ok);
11591 operand = copy_rtx (operand);
11592 PUT_MODE (operand, Pmode);
11593 parts[0] = parts[1] = parts[2] = operand;
11594 return size;
11597 if (GET_CODE (operand) == CONST_VECTOR)
11599 enum machine_mode imode = int_mode_for_mode (mode);
11600 /* Caution: if we looked through a constant pool memory above,
11601 the operand may actually have a different mode now. That's
11602 ok, since we want to pun this all the way back to an integer. */
11603 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11604 gcc_assert (operand != NULL);
11605 mode = imode;
11608 if (!TARGET_64BIT)
11610 if (mode == DImode)
11611 split_di (&operand, 1, &parts[0], &parts[1]);
11612 else
11614 if (REG_P (operand))
11616 gcc_assert (reload_completed);
11617 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11618 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11619 if (size == 3)
11620 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11622 else if (offsettable_memref_p (operand))
11624 operand = adjust_address (operand, SImode, 0);
11625 parts[0] = operand;
11626 parts[1] = adjust_address (operand, SImode, 4);
11627 if (size == 3)
11628 parts[2] = adjust_address (operand, SImode, 8);
11630 else if (GET_CODE (operand) == CONST_DOUBLE)
11632 REAL_VALUE_TYPE r;
11633 long l[4];
11635 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11636 switch (mode)
11638 case XFmode:
11639 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11640 parts[2] = gen_int_mode (l[2], SImode);
11641 break;
11642 case DFmode:
11643 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11644 break;
11645 default:
11646 gcc_unreachable ();
11648 parts[1] = gen_int_mode (l[1], SImode);
11649 parts[0] = gen_int_mode (l[0], SImode);
11651 else
11652 gcc_unreachable ();
11655 else
11657 if (mode == TImode)
11658 split_ti (&operand, 1, &parts[0], &parts[1]);
11659 if (mode == XFmode || mode == TFmode)
11661 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11662 if (REG_P (operand))
11664 gcc_assert (reload_completed);
11665 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11666 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11668 else if (offsettable_memref_p (operand))
11670 operand = adjust_address (operand, DImode, 0);
11671 parts[0] = operand;
11672 parts[1] = adjust_address (operand, upper_mode, 8);
11674 else if (GET_CODE (operand) == CONST_DOUBLE)
11676 REAL_VALUE_TYPE r;
11677 long l[4];
11679 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11680 real_to_target (l, &r, mode);
11682 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11683 if (HOST_BITS_PER_WIDE_INT >= 64)
11684 parts[0]
11685 = gen_int_mode
11686 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11687 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11688 DImode);
11689 else
11690 parts[0] = immed_double_const (l[0], l[1], DImode);
11692 if (upper_mode == SImode)
11693 parts[1] = gen_int_mode (l[2], SImode);
11694 else if (HOST_BITS_PER_WIDE_INT >= 64)
11695 parts[1]
11696 = gen_int_mode
11697 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11698 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11699 DImode);
11700 else
11701 parts[1] = immed_double_const (l[2], l[3], DImode);
11703 else
11704 gcc_unreachable ();
11708 return size;
11711 /* Emit insns to perform a move or push of DI, DF, and XF values.
11712 Return false when normal moves are needed; true when all required
11713 insns have been emitted. Operands 2-4 contain the input values
11714 int the correct order; operands 5-7 contain the output values. */
11716 void
11717 ix86_split_long_move (rtx operands[])
11719 rtx part[2][3];
11720 int nparts;
11721 int push = 0;
11722 int collisions = 0;
11723 enum machine_mode mode = GET_MODE (operands[0]);
11725 /* The DFmode expanders may ask us to move double.
11726 For 64bit target this is single move. By hiding the fact
11727 here we simplify i386.md splitters. */
11728 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11730 /* Optimize constant pool reference to immediates. This is used by
11731 fp moves, that force all constants to memory to allow combining. */
11733 if (GET_CODE (operands[1]) == MEM
11734 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11735 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11736 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11737 if (push_operand (operands[0], VOIDmode))
11739 operands[0] = copy_rtx (operands[0]);
11740 PUT_MODE (operands[0], Pmode);
11742 else
11743 operands[0] = gen_lowpart (DImode, operands[0]);
11744 operands[1] = gen_lowpart (DImode, operands[1]);
11745 emit_move_insn (operands[0], operands[1]);
11746 return;
11749 /* The only non-offsettable memory we handle is push. */
11750 if (push_operand (operands[0], VOIDmode))
11751 push = 1;
11752 else
11753 gcc_assert (GET_CODE (operands[0]) != MEM
11754 || offsettable_memref_p (operands[0]));
11756 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11757 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11759 /* When emitting push, take care for source operands on the stack. */
11760 if (push && GET_CODE (operands[1]) == MEM
11761 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11763 if (nparts == 3)
11764 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11765 XEXP (part[1][2], 0));
11766 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11767 XEXP (part[1][1], 0));
11770 /* We need to do copy in the right order in case an address register
11771 of the source overlaps the destination. */
11772 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11774 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11775 collisions++;
11776 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11777 collisions++;
11778 if (nparts == 3
11779 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11780 collisions++;
11782 /* Collision in the middle part can be handled by reordering. */
11783 if (collisions == 1 && nparts == 3
11784 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11786 rtx tmp;
11787 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11788 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11791 /* If there are more collisions, we can't handle it by reordering.
11792 Do an lea to the last part and use only one colliding move. */
11793 else if (collisions > 1)
11795 rtx base;
11797 collisions = 1;
11799 base = part[0][nparts - 1];
11801 /* Handle the case when the last part isn't valid for lea.
11802 Happens in 64-bit mode storing the 12-byte XFmode. */
11803 if (GET_MODE (base) != Pmode)
11804 base = gen_rtx_REG (Pmode, REGNO (base));
11806 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11807 part[1][0] = replace_equiv_address (part[1][0], base);
11808 part[1][1] = replace_equiv_address (part[1][1],
11809 plus_constant (base, UNITS_PER_WORD));
11810 if (nparts == 3)
11811 part[1][2] = replace_equiv_address (part[1][2],
11812 plus_constant (base, 8));
11816 if (push)
11818 if (!TARGET_64BIT)
11820 if (nparts == 3)
11822 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
11823 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
11824 emit_move_insn (part[0][2], part[1][2]);
11827 else
11829 /* In 64bit mode we don't have 32bit push available. In case this is
11830 register, it is OK - we will just use larger counterpart. We also
11831 retype memory - these comes from attempt to avoid REX prefix on
11832 moving of second half of TFmode value. */
11833 if (GET_MODE (part[1][1]) == SImode)
11835 switch (GET_CODE (part[1][1]))
11837 case MEM:
11838 part[1][1] = adjust_address (part[1][1], DImode, 0);
11839 break;
11841 case REG:
11842 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11843 break;
11845 default:
11846 gcc_unreachable ();
11849 if (GET_MODE (part[1][0]) == SImode)
11850 part[1][0] = part[1][1];
11853 emit_move_insn (part[0][1], part[1][1]);
11854 emit_move_insn (part[0][0], part[1][0]);
11855 return;
11858 /* Choose correct order to not overwrite the source before it is copied. */
11859 if ((REG_P (part[0][0])
11860 && REG_P (part[1][1])
11861 && (REGNO (part[0][0]) == REGNO (part[1][1])
11862 || (nparts == 3
11863 && REGNO (part[0][0]) == REGNO (part[1][2]))))
11864 || (collisions > 0
11865 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11867 if (nparts == 3)
11869 operands[2] = part[0][2];
11870 operands[3] = part[0][1];
11871 operands[4] = part[0][0];
11872 operands[5] = part[1][2];
11873 operands[6] = part[1][1];
11874 operands[7] = part[1][0];
11876 else
11878 operands[2] = part[0][1];
11879 operands[3] = part[0][0];
11880 operands[5] = part[1][1];
11881 operands[6] = part[1][0];
11884 else
11886 if (nparts == 3)
11888 operands[2] = part[0][0];
11889 operands[3] = part[0][1];
11890 operands[4] = part[0][2];
11891 operands[5] = part[1][0];
11892 operands[6] = part[1][1];
11893 operands[7] = part[1][2];
11895 else
11897 operands[2] = part[0][0];
11898 operands[3] = part[0][1];
11899 operands[5] = part[1][0];
11900 operands[6] = part[1][1];
11904 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
11905 if (optimize_size)
11907 if (GET_CODE (operands[5]) == CONST_INT
11908 && operands[5] != const0_rtx
11909 && REG_P (operands[2]))
11911 if (GET_CODE (operands[6]) == CONST_INT
11912 && INTVAL (operands[6]) == INTVAL (operands[5]))
11913 operands[6] = operands[2];
11915 if (nparts == 3
11916 && GET_CODE (operands[7]) == CONST_INT
11917 && INTVAL (operands[7]) == INTVAL (operands[5]))
11918 operands[7] = operands[2];
11921 if (nparts == 3
11922 && GET_CODE (operands[6]) == CONST_INT
11923 && operands[6] != const0_rtx
11924 && REG_P (operands[3])
11925 && GET_CODE (operands[7]) == CONST_INT
11926 && INTVAL (operands[7]) == INTVAL (operands[6]))
11927 operands[7] = operands[3];
11930 emit_move_insn (operands[2], operands[5]);
11931 emit_move_insn (operands[3], operands[6]);
11932 if (nparts == 3)
11933 emit_move_insn (operands[4], operands[7]);
11935 return;
11938 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
11939 left shift by a constant, either using a single shift or
11940 a sequence of add instructions. */
11942 static void
11943 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
11945 if (count == 1)
11947 emit_insn ((mode == DImode
11948 ? gen_addsi3
11949 : gen_adddi3) (operand, operand, operand));
11951 else if (!optimize_size
11952 && count * ix86_cost->add <= ix86_cost->shift_const)
11954 int i;
11955 for (i=0; i<count; i++)
11957 emit_insn ((mode == DImode
11958 ? gen_addsi3
11959 : gen_adddi3) (operand, operand, operand));
11962 else
11963 emit_insn ((mode == DImode
11964 ? gen_ashlsi3
11965 : gen_ashldi3) (operand, operand, GEN_INT (count)));
11968 void
11969 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
11971 rtx low[2], high[2];
11972 int count;
11973 const int single_width = mode == DImode ? 32 : 64;
11975 if (GET_CODE (operands[2]) == CONST_INT)
11977 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11978 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11980 if (count >= single_width)
11982 emit_move_insn (high[0], low[1]);
11983 emit_move_insn (low[0], const0_rtx);
11985 if (count > single_width)
11986 ix86_expand_ashl_const (high[0], count - single_width, mode);
11988 else
11990 if (!rtx_equal_p (operands[0], operands[1]))
11991 emit_move_insn (operands[0], operands[1]);
11992 emit_insn ((mode == DImode
11993 ? gen_x86_shld_1
11994 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
11995 ix86_expand_ashl_const (low[0], count, mode);
11997 return;
12000 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12002 if (operands[1] == const1_rtx)
12004 /* Assuming we've chosen a QImode capable registers, then 1 << N
12005 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12006 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12008 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12010 ix86_expand_clear (low[0]);
12011 ix86_expand_clear (high[0]);
12012 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12014 d = gen_lowpart (QImode, low[0]);
12015 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12016 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12017 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12019 d = gen_lowpart (QImode, high[0]);
12020 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12021 s = gen_rtx_NE (QImode, flags, const0_rtx);
12022 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12025 /* Otherwise, we can get the same results by manually performing
12026 a bit extract operation on bit 5/6, and then performing the two
12027 shifts. The two methods of getting 0/1 into low/high are exactly
12028 the same size. Avoiding the shift in the bit extract case helps
12029 pentium4 a bit; no one else seems to care much either way. */
12030 else
12032 rtx x;
12034 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12035 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12036 else
12037 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12038 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12040 emit_insn ((mode == DImode
12041 ? gen_lshrsi3
12042 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12043 emit_insn ((mode == DImode
12044 ? gen_andsi3
12045 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12046 emit_move_insn (low[0], high[0]);
12047 emit_insn ((mode == DImode
12048 ? gen_xorsi3
12049 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12052 emit_insn ((mode == DImode
12053 ? gen_ashlsi3
12054 : gen_ashldi3) (low[0], low[0], operands[2]));
12055 emit_insn ((mode == DImode
12056 ? gen_ashlsi3
12057 : gen_ashldi3) (high[0], high[0], operands[2]));
12058 return;
12061 if (operands[1] == constm1_rtx)
12063 /* For -1 << N, we can avoid the shld instruction, because we
12064 know that we're shifting 0...31/63 ones into a -1. */
12065 emit_move_insn (low[0], constm1_rtx);
12066 if (optimize_size)
12067 emit_move_insn (high[0], low[0]);
12068 else
12069 emit_move_insn (high[0], constm1_rtx);
12071 else
12073 if (!rtx_equal_p (operands[0], operands[1]))
12074 emit_move_insn (operands[0], operands[1]);
12076 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12077 emit_insn ((mode == DImode
12078 ? gen_x86_shld_1
12079 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12082 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12084 if (TARGET_CMOVE && scratch)
12086 ix86_expand_clear (scratch);
12087 emit_insn ((mode == DImode
12088 ? gen_x86_shift_adj_1
12089 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12091 else
12092 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12095 void
12096 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12098 rtx low[2], high[2];
12099 int count;
12100 const int single_width = mode == DImode ? 32 : 64;
12102 if (GET_CODE (operands[2]) == CONST_INT)
12104 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12105 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12107 if (count == single_width * 2 - 1)
12109 emit_move_insn (high[0], high[1]);
12110 emit_insn ((mode == DImode
12111 ? gen_ashrsi3
12112 : gen_ashrdi3) (high[0], high[0],
12113 GEN_INT (single_width - 1)));
12114 emit_move_insn (low[0], high[0]);
12117 else if (count >= single_width)
12119 emit_move_insn (low[0], high[1]);
12120 emit_move_insn (high[0], low[0]);
12121 emit_insn ((mode == DImode
12122 ? gen_ashrsi3
12123 : gen_ashrdi3) (high[0], high[0],
12124 GEN_INT (single_width - 1)));
12125 if (count > single_width)
12126 emit_insn ((mode == DImode
12127 ? gen_ashrsi3
12128 : gen_ashrdi3) (low[0], low[0],
12129 GEN_INT (count - single_width)));
12131 else
12133 if (!rtx_equal_p (operands[0], operands[1]))
12134 emit_move_insn (operands[0], operands[1]);
12135 emit_insn ((mode == DImode
12136 ? gen_x86_shrd_1
12137 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12138 emit_insn ((mode == DImode
12139 ? gen_ashrsi3
12140 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12143 else
12145 if (!rtx_equal_p (operands[0], operands[1]))
12146 emit_move_insn (operands[0], operands[1]);
12148 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12150 emit_insn ((mode == DImode
12151 ? gen_x86_shrd_1
12152 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12153 emit_insn ((mode == DImode
12154 ? gen_ashrsi3
12155 : gen_ashrdi3) (high[0], high[0], operands[2]));
12157 if (TARGET_CMOVE && scratch)
12159 emit_move_insn (scratch, high[0]);
12160 emit_insn ((mode == DImode
12161 ? gen_ashrsi3
12162 : gen_ashrdi3) (scratch, scratch,
12163 GEN_INT (single_width - 1)));
12164 emit_insn ((mode == DImode
12165 ? gen_x86_shift_adj_1
12166 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12167 scratch));
12169 else
12170 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12174 void
12175 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12177 rtx low[2], high[2];
12178 int count;
12179 const int single_width = mode == DImode ? 32 : 64;
12181 if (GET_CODE (operands[2]) == CONST_INT)
12183 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12184 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12186 if (count >= single_width)
12188 emit_move_insn (low[0], high[1]);
12189 ix86_expand_clear (high[0]);
12191 if (count > single_width)
12192 emit_insn ((mode == DImode
12193 ? gen_lshrsi3
12194 : gen_lshrdi3) (low[0], low[0],
12195 GEN_INT (count - single_width)));
12197 else
12199 if (!rtx_equal_p (operands[0], operands[1]))
12200 emit_move_insn (operands[0], operands[1]);
12201 emit_insn ((mode == DImode
12202 ? gen_x86_shrd_1
12203 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12204 emit_insn ((mode == DImode
12205 ? gen_lshrsi3
12206 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12209 else
12211 if (!rtx_equal_p (operands[0], operands[1]))
12212 emit_move_insn (operands[0], operands[1]);
12214 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12216 emit_insn ((mode == DImode
12217 ? gen_x86_shrd_1
12218 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12219 emit_insn ((mode == DImode
12220 ? gen_lshrsi3
12221 : gen_lshrdi3) (high[0], high[0], operands[2]));
12223 /* Heh. By reversing the arguments, we can reuse this pattern. */
12224 if (TARGET_CMOVE && scratch)
12226 ix86_expand_clear (scratch);
12227 emit_insn ((mode == DImode
12228 ? gen_x86_shift_adj_1
12229 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12230 scratch));
12232 else
12233 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12237 /* Helper function for the string operations below. Dest VARIABLE whether
12238 it is aligned to VALUE bytes. If true, jump to the label. */
12239 static rtx
12240 ix86_expand_aligntest (rtx variable, int value)
12242 rtx label = gen_label_rtx ();
12243 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12244 if (GET_MODE (variable) == DImode)
12245 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12246 else
12247 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12248 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12249 1, label);
12250 return label;
12253 /* Adjust COUNTER by the VALUE. */
12254 static void
12255 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12257 if (GET_MODE (countreg) == DImode)
12258 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12259 else
12260 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12263 /* Zero extend possibly SImode EXP to Pmode register. */
12265 ix86_zero_extend_to_Pmode (rtx exp)
12267 rtx r;
12268 if (GET_MODE (exp) == VOIDmode)
12269 return force_reg (Pmode, exp);
12270 if (GET_MODE (exp) == Pmode)
12271 return copy_to_mode_reg (Pmode, exp);
12272 r = gen_reg_rtx (Pmode);
12273 emit_insn (gen_zero_extendsidi2 (r, exp));
12274 return r;
12277 /* Expand string move (memcpy) operation. Use i386 string operations when
12278 profitable. expand_clrmem contains similar code. */
12280 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12282 rtx srcreg, destreg, countreg, srcexp, destexp;
12283 enum machine_mode counter_mode;
12284 HOST_WIDE_INT align = 0;
12285 unsigned HOST_WIDE_INT count = 0;
12287 if (GET_CODE (align_exp) == CONST_INT)
12288 align = INTVAL (align_exp);
12290 /* Can't use any of this if the user has appropriated esi or edi. */
12291 if (global_regs[4] || global_regs[5])
12292 return 0;
12294 /* This simple hack avoids all inlining code and simplifies code below. */
12295 if (!TARGET_ALIGN_STRINGOPS)
12296 align = 64;
12298 if (GET_CODE (count_exp) == CONST_INT)
12300 count = INTVAL (count_exp);
12301 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12302 return 0;
12305 /* Figure out proper mode for counter. For 32bits it is always SImode,
12306 for 64bits use SImode when possible, otherwise DImode.
12307 Set count to number of bytes copied when known at compile time. */
12308 if (!TARGET_64BIT
12309 || GET_MODE (count_exp) == SImode
12310 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12311 counter_mode = SImode;
12312 else
12313 counter_mode = DImode;
12315 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12317 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12318 if (destreg != XEXP (dst, 0))
12319 dst = replace_equiv_address_nv (dst, destreg);
12320 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12321 if (srcreg != XEXP (src, 0))
12322 src = replace_equiv_address_nv (src, srcreg);
12324 /* When optimizing for size emit simple rep ; movsb instruction for
12325 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12326 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12327 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12328 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12329 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12330 known to be zero or not. The rep; movsb sequence causes higher
12331 register pressure though, so take that into account. */
12333 if ((!optimize || optimize_size)
12334 && (count == 0
12335 || ((count & 0x03)
12336 && (!optimize_size
12337 || count > 5 * 4
12338 || (count & 3) + count / 4 > 6))))
12340 emit_insn (gen_cld ());
12341 countreg = ix86_zero_extend_to_Pmode (count_exp);
12342 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12343 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12344 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12345 destexp, srcexp));
12348 /* For constant aligned (or small unaligned) copies use rep movsl
12349 followed by code copying the rest. For PentiumPro ensure 8 byte
12350 alignment to allow rep movsl acceleration. */
12352 else if (count != 0
12353 && (align >= 8
12354 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12355 || optimize_size || count < (unsigned int) 64))
12357 unsigned HOST_WIDE_INT offset = 0;
12358 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12359 rtx srcmem, dstmem;
12361 emit_insn (gen_cld ());
12362 if (count & ~(size - 1))
12364 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12366 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12368 while (offset < (count & ~(size - 1)))
12370 srcmem = adjust_automodify_address_nv (src, movs_mode,
12371 srcreg, offset);
12372 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12373 destreg, offset);
12374 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12375 offset += size;
12378 else
12380 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12381 & (TARGET_64BIT ? -1 : 0x3fffffff));
12382 countreg = copy_to_mode_reg (counter_mode, countreg);
12383 countreg = ix86_zero_extend_to_Pmode (countreg);
12385 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12386 GEN_INT (size == 4 ? 2 : 3));
12387 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12388 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12390 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12391 countreg, destexp, srcexp));
12392 offset = count & ~(size - 1);
12395 if (size == 8 && (count & 0x04))
12397 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12398 offset);
12399 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12400 offset);
12401 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12402 offset += 4;
12404 if (count & 0x02)
12406 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12407 offset);
12408 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12409 offset);
12410 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12411 offset += 2;
12413 if (count & 0x01)
12415 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12416 offset);
12417 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12418 offset);
12419 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12422 /* The generic code based on the glibc implementation:
12423 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12424 allowing accelerated copying there)
12425 - copy the data using rep movsl
12426 - copy the rest. */
12427 else
12429 rtx countreg2;
12430 rtx label = NULL;
12431 rtx srcmem, dstmem;
12432 int desired_alignment = (TARGET_PENTIUMPRO
12433 && (count == 0 || count >= (unsigned int) 260)
12434 ? 8 : UNITS_PER_WORD);
12435 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12436 dst = change_address (dst, BLKmode, destreg);
12437 src = change_address (src, BLKmode, srcreg);
12439 /* In case we don't know anything about the alignment, default to
12440 library version, since it is usually equally fast and result in
12441 shorter code.
12443 Also emit call when we know that the count is large and call overhead
12444 will not be important. */
12445 if (!TARGET_INLINE_ALL_STRINGOPS
12446 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12447 return 0;
12449 if (TARGET_SINGLE_STRINGOP)
12450 emit_insn (gen_cld ());
12452 countreg2 = gen_reg_rtx (Pmode);
12453 countreg = copy_to_mode_reg (counter_mode, count_exp);
12455 /* We don't use loops to align destination and to copy parts smaller
12456 than 4 bytes, because gcc is able to optimize such code better (in
12457 the case the destination or the count really is aligned, gcc is often
12458 able to predict the branches) and also it is friendlier to the
12459 hardware branch prediction.
12461 Using loops is beneficial for generic case, because we can
12462 handle small counts using the loops. Many CPUs (such as Athlon)
12463 have large REP prefix setup costs.
12465 This is quite costly. Maybe we can revisit this decision later or
12466 add some customizability to this code. */
12468 if (count == 0 && align < desired_alignment)
12470 label = gen_label_rtx ();
12471 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12472 LEU, 0, counter_mode, 1, label);
12474 if (align <= 1)
12476 rtx label = ix86_expand_aligntest (destreg, 1);
12477 srcmem = change_address (src, QImode, srcreg);
12478 dstmem = change_address (dst, QImode, destreg);
12479 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12480 ix86_adjust_counter (countreg, 1);
12481 emit_label (label);
12482 LABEL_NUSES (label) = 1;
12484 if (align <= 2)
12486 rtx label = ix86_expand_aligntest (destreg, 2);
12487 srcmem = change_address (src, HImode, srcreg);
12488 dstmem = change_address (dst, HImode, destreg);
12489 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12490 ix86_adjust_counter (countreg, 2);
12491 emit_label (label);
12492 LABEL_NUSES (label) = 1;
12494 if (align <= 4 && desired_alignment > 4)
12496 rtx label = ix86_expand_aligntest (destreg, 4);
12497 srcmem = change_address (src, SImode, srcreg);
12498 dstmem = change_address (dst, SImode, destreg);
12499 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12500 ix86_adjust_counter (countreg, 4);
12501 emit_label (label);
12502 LABEL_NUSES (label) = 1;
12505 if (label && desired_alignment > 4 && !TARGET_64BIT)
12507 emit_label (label);
12508 LABEL_NUSES (label) = 1;
12509 label = NULL_RTX;
12511 if (!TARGET_SINGLE_STRINGOP)
12512 emit_insn (gen_cld ());
12513 if (TARGET_64BIT)
12515 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12516 GEN_INT (3)));
12517 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12519 else
12521 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12522 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12524 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12525 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12526 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12527 countreg2, destexp, srcexp));
12529 if (label)
12531 emit_label (label);
12532 LABEL_NUSES (label) = 1;
12534 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12536 srcmem = change_address (src, SImode, srcreg);
12537 dstmem = change_address (dst, SImode, destreg);
12538 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12540 if ((align <= 4 || count == 0) && TARGET_64BIT)
12542 rtx label = ix86_expand_aligntest (countreg, 4);
12543 srcmem = change_address (src, SImode, srcreg);
12544 dstmem = change_address (dst, SImode, destreg);
12545 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12546 emit_label (label);
12547 LABEL_NUSES (label) = 1;
12549 if (align > 2 && count != 0 && (count & 2))
12551 srcmem = change_address (src, HImode, srcreg);
12552 dstmem = change_address (dst, HImode, destreg);
12553 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12555 if (align <= 2 || count == 0)
12557 rtx label = ix86_expand_aligntest (countreg, 2);
12558 srcmem = change_address (src, HImode, srcreg);
12559 dstmem = change_address (dst, HImode, destreg);
12560 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12561 emit_label (label);
12562 LABEL_NUSES (label) = 1;
12564 if (align > 1 && count != 0 && (count & 1))
12566 srcmem = change_address (src, QImode, srcreg);
12567 dstmem = change_address (dst, QImode, destreg);
12568 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12570 if (align <= 1 || count == 0)
12572 rtx label = ix86_expand_aligntest (countreg, 1);
12573 srcmem = change_address (src, QImode, srcreg);
12574 dstmem = change_address (dst, QImode, destreg);
12575 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12576 emit_label (label);
12577 LABEL_NUSES (label) = 1;
12581 return 1;
12584 /* Expand string clear operation (bzero). Use i386 string operations when
12585 profitable. expand_movmem contains similar code. */
12587 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12589 rtx destreg, zeroreg, countreg, destexp;
12590 enum machine_mode counter_mode;
12591 HOST_WIDE_INT align = 0;
12592 unsigned HOST_WIDE_INT count = 0;
12594 if (GET_CODE (align_exp) == CONST_INT)
12595 align = INTVAL (align_exp);
12597 /* Can't use any of this if the user has appropriated esi. */
12598 if (global_regs[4])
12599 return 0;
12601 /* This simple hack avoids all inlining code and simplifies code below. */
12602 if (!TARGET_ALIGN_STRINGOPS)
12603 align = 32;
12605 if (GET_CODE (count_exp) == CONST_INT)
12607 count = INTVAL (count_exp);
12608 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12609 return 0;
12611 /* Figure out proper mode for counter. For 32bits it is always SImode,
12612 for 64bits use SImode when possible, otherwise DImode.
12613 Set count to number of bytes copied when known at compile time. */
12614 if (!TARGET_64BIT
12615 || GET_MODE (count_exp) == SImode
12616 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12617 counter_mode = SImode;
12618 else
12619 counter_mode = DImode;
12621 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12622 if (destreg != XEXP (dst, 0))
12623 dst = replace_equiv_address_nv (dst, destreg);
12626 /* When optimizing for size emit simple rep ; movsb instruction for
12627 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12628 sequence is 7 bytes long, so if optimizing for size and count is
12629 small enough that some stosl, stosw and stosb instructions without
12630 rep are shorter, fall back into the next if. */
12632 if ((!optimize || optimize_size)
12633 && (count == 0
12634 || ((count & 0x03)
12635 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12637 emit_insn (gen_cld ());
12639 countreg = ix86_zero_extend_to_Pmode (count_exp);
12640 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12641 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12642 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12644 else if (count != 0
12645 && (align >= 8
12646 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12647 || optimize_size || count < (unsigned int) 64))
12649 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12650 unsigned HOST_WIDE_INT offset = 0;
12652 emit_insn (gen_cld ());
12654 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12655 if (count & ~(size - 1))
12657 unsigned HOST_WIDE_INT repcount;
12658 unsigned int max_nonrep;
12660 repcount = count >> (size == 4 ? 2 : 3);
12661 if (!TARGET_64BIT)
12662 repcount &= 0x3fffffff;
12664 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12665 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12666 bytes. In both cases the latter seems to be faster for small
12667 values of N. */
12668 max_nonrep = size == 4 ? 7 : 4;
12669 if (!optimize_size)
12670 switch (ix86_tune)
12672 case PROCESSOR_PENTIUM4:
12673 case PROCESSOR_NOCONA:
12674 max_nonrep = 3;
12675 break;
12676 default:
12677 break;
12680 if (repcount <= max_nonrep)
12681 while (repcount-- > 0)
12683 rtx mem = adjust_automodify_address_nv (dst,
12684 GET_MODE (zeroreg),
12685 destreg, offset);
12686 emit_insn (gen_strset (destreg, mem, zeroreg));
12687 offset += size;
12689 else
12691 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12692 countreg = ix86_zero_extend_to_Pmode (countreg);
12693 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12694 GEN_INT (size == 4 ? 2 : 3));
12695 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12696 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12697 destexp));
12698 offset = count & ~(size - 1);
12701 if (size == 8 && (count & 0x04))
12703 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12704 offset);
12705 emit_insn (gen_strset (destreg, mem,
12706 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12707 offset += 4;
12709 if (count & 0x02)
12711 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12712 offset);
12713 emit_insn (gen_strset (destreg, mem,
12714 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12715 offset += 2;
12717 if (count & 0x01)
12719 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12720 offset);
12721 emit_insn (gen_strset (destreg, mem,
12722 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12725 else
12727 rtx countreg2;
12728 rtx label = NULL;
12729 /* Compute desired alignment of the string operation. */
12730 int desired_alignment = (TARGET_PENTIUMPRO
12731 && (count == 0 || count >= (unsigned int) 260)
12732 ? 8 : UNITS_PER_WORD);
12734 /* In case we don't know anything about the alignment, default to
12735 library version, since it is usually equally fast and result in
12736 shorter code.
12738 Also emit call when we know that the count is large and call overhead
12739 will not be important. */
12740 if (!TARGET_INLINE_ALL_STRINGOPS
12741 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12742 return 0;
12744 if (TARGET_SINGLE_STRINGOP)
12745 emit_insn (gen_cld ());
12747 countreg2 = gen_reg_rtx (Pmode);
12748 countreg = copy_to_mode_reg (counter_mode, count_exp);
12749 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12750 /* Get rid of MEM_OFFSET, it won't be accurate. */
12751 dst = change_address (dst, BLKmode, destreg);
12753 if (count == 0 && align < desired_alignment)
12755 label = gen_label_rtx ();
12756 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12757 LEU, 0, counter_mode, 1, label);
12759 if (align <= 1)
12761 rtx label = ix86_expand_aligntest (destreg, 1);
12762 emit_insn (gen_strset (destreg, dst,
12763 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12764 ix86_adjust_counter (countreg, 1);
12765 emit_label (label);
12766 LABEL_NUSES (label) = 1;
12768 if (align <= 2)
12770 rtx label = ix86_expand_aligntest (destreg, 2);
12771 emit_insn (gen_strset (destreg, dst,
12772 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12773 ix86_adjust_counter (countreg, 2);
12774 emit_label (label);
12775 LABEL_NUSES (label) = 1;
12777 if (align <= 4 && desired_alignment > 4)
12779 rtx label = ix86_expand_aligntest (destreg, 4);
12780 emit_insn (gen_strset (destreg, dst,
12781 (TARGET_64BIT
12782 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12783 : zeroreg)));
12784 ix86_adjust_counter (countreg, 4);
12785 emit_label (label);
12786 LABEL_NUSES (label) = 1;
12789 if (label && desired_alignment > 4 && !TARGET_64BIT)
12791 emit_label (label);
12792 LABEL_NUSES (label) = 1;
12793 label = NULL_RTX;
12796 if (!TARGET_SINGLE_STRINGOP)
12797 emit_insn (gen_cld ());
12798 if (TARGET_64BIT)
12800 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12801 GEN_INT (3)));
12802 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12804 else
12806 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12807 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12809 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12810 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
12812 if (label)
12814 emit_label (label);
12815 LABEL_NUSES (label) = 1;
12818 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12819 emit_insn (gen_strset (destreg, dst,
12820 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12821 if (TARGET_64BIT && (align <= 4 || count == 0))
12823 rtx label = ix86_expand_aligntest (countreg, 4);
12824 emit_insn (gen_strset (destreg, dst,
12825 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12826 emit_label (label);
12827 LABEL_NUSES (label) = 1;
12829 if (align > 2 && count != 0 && (count & 2))
12830 emit_insn (gen_strset (destreg, dst,
12831 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12832 if (align <= 2 || count == 0)
12834 rtx label = ix86_expand_aligntest (countreg, 2);
12835 emit_insn (gen_strset (destreg, dst,
12836 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12837 emit_label (label);
12838 LABEL_NUSES (label) = 1;
12840 if (align > 1 && count != 0 && (count & 1))
12841 emit_insn (gen_strset (destreg, dst,
12842 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12843 if (align <= 1 || count == 0)
12845 rtx label = ix86_expand_aligntest (countreg, 1);
12846 emit_insn (gen_strset (destreg, dst,
12847 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12848 emit_label (label);
12849 LABEL_NUSES (label) = 1;
12852 return 1;
12855 /* Expand strlen. */
12857 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
12859 rtx addr, scratch1, scratch2, scratch3, scratch4;
12861 /* The generic case of strlen expander is long. Avoid it's
12862 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
12864 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12865 && !TARGET_INLINE_ALL_STRINGOPS
12866 && !optimize_size
12867 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12868 return 0;
12870 addr = force_reg (Pmode, XEXP (src, 0));
12871 scratch1 = gen_reg_rtx (Pmode);
12873 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12874 && !optimize_size)
12876 /* Well it seems that some optimizer does not combine a call like
12877 foo(strlen(bar), strlen(bar));
12878 when the move and the subtraction is done here. It does calculate
12879 the length just once when these instructions are done inside of
12880 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
12881 often used and I use one fewer register for the lifetime of
12882 output_strlen_unroll() this is better. */
12884 emit_move_insn (out, addr);
12886 ix86_expand_strlensi_unroll_1 (out, src, align);
12888 /* strlensi_unroll_1 returns the address of the zero at the end of
12889 the string, like memchr(), so compute the length by subtracting
12890 the start address. */
12891 if (TARGET_64BIT)
12892 emit_insn (gen_subdi3 (out, out, addr));
12893 else
12894 emit_insn (gen_subsi3 (out, out, addr));
12896 else
12898 rtx unspec;
12899 scratch2 = gen_reg_rtx (Pmode);
12900 scratch3 = gen_reg_rtx (Pmode);
12901 scratch4 = force_reg (Pmode, constm1_rtx);
12903 emit_move_insn (scratch3, addr);
12904 eoschar = force_reg (QImode, eoschar);
12906 emit_insn (gen_cld ());
12907 src = replace_equiv_address_nv (src, scratch3);
12909 /* If .md starts supporting :P, this can be done in .md. */
12910 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12911 scratch4), UNSPEC_SCAS);
12912 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
12913 if (TARGET_64BIT)
12915 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12916 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12918 else
12920 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12921 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12924 return 1;
12927 /* Expand the appropriate insns for doing strlen if not just doing
12928 repnz; scasb
12930 out = result, initialized with the start address
12931 align_rtx = alignment of the address.
12932 scratch = scratch register, initialized with the startaddress when
12933 not aligned, otherwise undefined
12935 This is just the body. It needs the initializations mentioned above and
12936 some address computing at the end. These things are done in i386.md. */
12938 static void
12939 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12941 int align;
12942 rtx tmp;
12943 rtx align_2_label = NULL_RTX;
12944 rtx align_3_label = NULL_RTX;
12945 rtx align_4_label = gen_label_rtx ();
12946 rtx end_0_label = gen_label_rtx ();
12947 rtx mem;
12948 rtx tmpreg = gen_reg_rtx (SImode);
12949 rtx scratch = gen_reg_rtx (SImode);
12950 rtx cmp;
12952 align = 0;
12953 if (GET_CODE (align_rtx) == CONST_INT)
12954 align = INTVAL (align_rtx);
12956 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
12958 /* Is there a known alignment and is it less than 4? */
12959 if (align < 4)
12961 rtx scratch1 = gen_reg_rtx (Pmode);
12962 emit_move_insn (scratch1, out);
12963 /* Is there a known alignment and is it not 2? */
12964 if (align != 2)
12966 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12967 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12969 /* Leave just the 3 lower bits. */
12970 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12971 NULL_RTX, 0, OPTAB_WIDEN);
12973 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12974 Pmode, 1, align_4_label);
12975 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12976 Pmode, 1, align_2_label);
12977 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12978 Pmode, 1, align_3_label);
12980 else
12982 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12983 check if is aligned to 4 - byte. */
12985 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12986 NULL_RTX, 0, OPTAB_WIDEN);
12988 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12989 Pmode, 1, align_4_label);
12992 mem = change_address (src, QImode, out);
12994 /* Now compare the bytes. */
12996 /* Compare the first n unaligned byte on a byte per byte basis. */
12997 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
12998 QImode, 1, end_0_label);
13000 /* Increment the address. */
13001 if (TARGET_64BIT)
13002 emit_insn (gen_adddi3 (out, out, const1_rtx));
13003 else
13004 emit_insn (gen_addsi3 (out, out, const1_rtx));
13006 /* Not needed with an alignment of 2 */
13007 if (align != 2)
13009 emit_label (align_2_label);
13011 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13012 end_0_label);
13014 if (TARGET_64BIT)
13015 emit_insn (gen_adddi3 (out, out, const1_rtx));
13016 else
13017 emit_insn (gen_addsi3 (out, out, const1_rtx));
13019 emit_label (align_3_label);
13022 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13023 end_0_label);
13025 if (TARGET_64BIT)
13026 emit_insn (gen_adddi3 (out, out, const1_rtx));
13027 else
13028 emit_insn (gen_addsi3 (out, out, const1_rtx));
13031 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13032 align this loop. It gives only huge programs, but does not help to
13033 speed up. */
13034 emit_label (align_4_label);
13036 mem = change_address (src, SImode, out);
13037 emit_move_insn (scratch, mem);
13038 if (TARGET_64BIT)
13039 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13040 else
13041 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13043 /* This formula yields a nonzero result iff one of the bytes is zero.
13044 This saves three branches inside loop and many cycles. */
13046 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13047 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13048 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13049 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13050 gen_int_mode (0x80808080, SImode)));
13051 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13052 align_4_label);
13054 if (TARGET_CMOVE)
13056 rtx reg = gen_reg_rtx (SImode);
13057 rtx reg2 = gen_reg_rtx (Pmode);
13058 emit_move_insn (reg, tmpreg);
13059 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13061 /* If zero is not in the first two bytes, move two bytes forward. */
13062 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13063 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13064 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13065 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13066 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13067 reg,
13068 tmpreg)));
13069 /* Emit lea manually to avoid clobbering of flags. */
13070 emit_insn (gen_rtx_SET (SImode, reg2,
13071 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13073 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13074 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13075 emit_insn (gen_rtx_SET (VOIDmode, out,
13076 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13077 reg2,
13078 out)));
13081 else
13083 rtx end_2_label = gen_label_rtx ();
13084 /* Is zero in the first two bytes? */
13086 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13087 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13088 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13089 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13090 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13091 pc_rtx);
13092 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13093 JUMP_LABEL (tmp) = end_2_label;
13095 /* Not in the first two. Move two bytes forward. */
13096 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13097 if (TARGET_64BIT)
13098 emit_insn (gen_adddi3 (out, out, const2_rtx));
13099 else
13100 emit_insn (gen_addsi3 (out, out, const2_rtx));
13102 emit_label (end_2_label);
13106 /* Avoid branch in fixing the byte. */
13107 tmpreg = gen_lowpart (QImode, tmpreg);
13108 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13109 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13110 if (TARGET_64BIT)
13111 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13112 else
13113 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13115 emit_label (end_0_label);
13118 void
13119 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13120 rtx callarg2 ATTRIBUTE_UNUSED,
13121 rtx pop, int sibcall)
13123 rtx use = NULL, call;
13125 if (pop == const0_rtx)
13126 pop = NULL;
13127 gcc_assert (!TARGET_64BIT || !pop);
13129 #if TARGET_MACHO
13130 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13131 fnaddr = machopic_indirect_call_target (fnaddr);
13132 #else
13133 /* Static functions and indirect calls don't need the pic register. */
13134 if (! TARGET_64BIT && flag_pic
13135 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13136 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13137 use_reg (&use, pic_offset_table_rtx);
13139 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13141 rtx al = gen_rtx_REG (QImode, 0);
13142 emit_move_insn (al, callarg2);
13143 use_reg (&use, al);
13145 #endif /* TARGET_MACHO */
13147 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13149 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13150 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13152 if (sibcall && TARGET_64BIT
13153 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13155 rtx addr;
13156 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13157 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13158 emit_move_insn (fnaddr, addr);
13159 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13162 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13163 if (retval)
13164 call = gen_rtx_SET (VOIDmode, retval, call);
13165 if (pop)
13167 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13168 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13169 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13172 call = emit_call_insn (call);
13173 if (use)
13174 CALL_INSN_FUNCTION_USAGE (call) = use;
13178 /* Clear stack slot assignments remembered from previous functions.
13179 This is called from INIT_EXPANDERS once before RTL is emitted for each
13180 function. */
13182 static struct machine_function *
13183 ix86_init_machine_status (void)
13185 struct machine_function *f;
13187 f = ggc_alloc_cleared (sizeof (struct machine_function));
13188 f->use_fast_prologue_epilogue_nregs = -1;
13189 f->tls_descriptor_call_expanded_p = 0;
13191 return f;
13194 /* Return a MEM corresponding to a stack slot with mode MODE.
13195 Allocate a new slot if necessary.
13197 The RTL for a function can have several slots available: N is
13198 which slot to use. */
13201 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13203 struct stack_local_entry *s;
13205 gcc_assert (n < MAX_386_STACK_LOCALS);
13207 for (s = ix86_stack_locals; s; s = s->next)
13208 if (s->mode == mode && s->n == n)
13209 return s->rtl;
13211 s = (struct stack_local_entry *)
13212 ggc_alloc (sizeof (struct stack_local_entry));
13213 s->n = n;
13214 s->mode = mode;
13215 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13217 s->next = ix86_stack_locals;
13218 ix86_stack_locals = s;
13219 return s->rtl;
13222 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13224 static GTY(()) rtx ix86_tls_symbol;
13226 ix86_tls_get_addr (void)
13229 if (!ix86_tls_symbol)
13231 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13232 (TARGET_ANY_GNU_TLS
13233 && !TARGET_64BIT)
13234 ? "___tls_get_addr"
13235 : "__tls_get_addr");
13238 return ix86_tls_symbol;
13241 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13243 static GTY(()) rtx ix86_tls_module_base_symbol;
13245 ix86_tls_module_base (void)
13248 if (!ix86_tls_module_base_symbol)
13250 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13251 "_TLS_MODULE_BASE_");
13252 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13253 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13256 return ix86_tls_module_base_symbol;
13259 /* Calculate the length of the memory address in the instruction
13260 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13263 memory_address_length (rtx addr)
13265 struct ix86_address parts;
13266 rtx base, index, disp;
13267 int len;
13268 int ok;
13270 if (GET_CODE (addr) == PRE_DEC
13271 || GET_CODE (addr) == POST_INC
13272 || GET_CODE (addr) == PRE_MODIFY
13273 || GET_CODE (addr) == POST_MODIFY)
13274 return 0;
13276 ok = ix86_decompose_address (addr, &parts);
13277 gcc_assert (ok);
13279 if (parts.base && GET_CODE (parts.base) == SUBREG)
13280 parts.base = SUBREG_REG (parts.base);
13281 if (parts.index && GET_CODE (parts.index) == SUBREG)
13282 parts.index = SUBREG_REG (parts.index);
13284 base = parts.base;
13285 index = parts.index;
13286 disp = parts.disp;
13287 len = 0;
13289 /* Rule of thumb:
13290 - esp as the base always wants an index,
13291 - ebp as the base always wants a displacement. */
13293 /* Register Indirect. */
13294 if (base && !index && !disp)
13296 /* esp (for its index) and ebp (for its displacement) need
13297 the two-byte modrm form. */
13298 if (addr == stack_pointer_rtx
13299 || addr == arg_pointer_rtx
13300 || addr == frame_pointer_rtx
13301 || addr == hard_frame_pointer_rtx)
13302 len = 1;
13305 /* Direct Addressing. */
13306 else if (disp && !base && !index)
13307 len = 4;
13309 else
13311 /* Find the length of the displacement constant. */
13312 if (disp)
13314 if (base && satisfies_constraint_K (disp))
13315 len = 1;
13316 else
13317 len = 4;
13319 /* ebp always wants a displacement. */
13320 else if (base == hard_frame_pointer_rtx)
13321 len = 1;
13323 /* An index requires the two-byte modrm form.... */
13324 if (index
13325 /* ...like esp, which always wants an index. */
13326 || base == stack_pointer_rtx
13327 || base == arg_pointer_rtx
13328 || base == frame_pointer_rtx)
13329 len += 1;
13332 return len;
13335 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13336 is set, expect that insn have 8bit immediate alternative. */
13338 ix86_attr_length_immediate_default (rtx insn, int shortform)
13340 int len = 0;
13341 int i;
13342 extract_insn_cached (insn);
13343 for (i = recog_data.n_operands - 1; i >= 0; --i)
13344 if (CONSTANT_P (recog_data.operand[i]))
13346 gcc_assert (!len);
13347 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13348 len = 1;
13349 else
13351 switch (get_attr_mode (insn))
13353 case MODE_QI:
13354 len+=1;
13355 break;
13356 case MODE_HI:
13357 len+=2;
13358 break;
13359 case MODE_SI:
13360 len+=4;
13361 break;
13362 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13363 case MODE_DI:
13364 len+=4;
13365 break;
13366 default:
13367 fatal_insn ("unknown insn mode", insn);
13371 return len;
13373 /* Compute default value for "length_address" attribute. */
13375 ix86_attr_length_address_default (rtx insn)
13377 int i;
13379 if (get_attr_type (insn) == TYPE_LEA)
13381 rtx set = PATTERN (insn);
13383 if (GET_CODE (set) == PARALLEL)
13384 set = XVECEXP (set, 0, 0);
13386 gcc_assert (GET_CODE (set) == SET);
13388 return memory_address_length (SET_SRC (set));
13391 extract_insn_cached (insn);
13392 for (i = recog_data.n_operands - 1; i >= 0; --i)
13393 if (GET_CODE (recog_data.operand[i]) == MEM)
13395 return memory_address_length (XEXP (recog_data.operand[i], 0));
13396 break;
13398 return 0;
13401 /* Return the maximum number of instructions a cpu can issue. */
13403 static int
13404 ix86_issue_rate (void)
13406 switch (ix86_tune)
13408 case PROCESSOR_PENTIUM:
13409 case PROCESSOR_K6:
13410 return 2;
13412 case PROCESSOR_PENTIUMPRO:
13413 case PROCESSOR_PENTIUM4:
13414 case PROCESSOR_ATHLON:
13415 case PROCESSOR_K8:
13416 case PROCESSOR_NOCONA:
13417 case PROCESSOR_GENERIC32:
13418 case PROCESSOR_GENERIC64:
13419 return 3;
13421 default:
13422 return 1;
13426 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13427 by DEP_INSN and nothing set by DEP_INSN. */
13429 static int
13430 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13432 rtx set, set2;
13434 /* Simplify the test for uninteresting insns. */
13435 if (insn_type != TYPE_SETCC
13436 && insn_type != TYPE_ICMOV
13437 && insn_type != TYPE_FCMOV
13438 && insn_type != TYPE_IBR)
13439 return 0;
13441 if ((set = single_set (dep_insn)) != 0)
13443 set = SET_DEST (set);
13444 set2 = NULL_RTX;
13446 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13447 && XVECLEN (PATTERN (dep_insn), 0) == 2
13448 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13449 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13451 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13452 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13454 else
13455 return 0;
13457 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13458 return 0;
13460 /* This test is true if the dependent insn reads the flags but
13461 not any other potentially set register. */
13462 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13463 return 0;
13465 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13466 return 0;
13468 return 1;
13471 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13472 address with operands set by DEP_INSN. */
13474 static int
13475 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13477 rtx addr;
13479 if (insn_type == TYPE_LEA
13480 && TARGET_PENTIUM)
13482 addr = PATTERN (insn);
13484 if (GET_CODE (addr) == PARALLEL)
13485 addr = XVECEXP (addr, 0, 0);
13487 gcc_assert (GET_CODE (addr) == SET);
13489 addr = SET_SRC (addr);
13491 else
13493 int i;
13494 extract_insn_cached (insn);
13495 for (i = recog_data.n_operands - 1; i >= 0; --i)
13496 if (GET_CODE (recog_data.operand[i]) == MEM)
13498 addr = XEXP (recog_data.operand[i], 0);
13499 goto found;
13501 return 0;
13502 found:;
13505 return modified_in_p (addr, dep_insn);
13508 static int
13509 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13511 enum attr_type insn_type, dep_insn_type;
13512 enum attr_memory memory;
13513 rtx set, set2;
13514 int dep_insn_code_number;
13516 /* Anti and output dependencies have zero cost on all CPUs. */
13517 if (REG_NOTE_KIND (link) != 0)
13518 return 0;
13520 dep_insn_code_number = recog_memoized (dep_insn);
13522 /* If we can't recognize the insns, we can't really do anything. */
13523 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13524 return cost;
13526 insn_type = get_attr_type (insn);
13527 dep_insn_type = get_attr_type (dep_insn);
13529 switch (ix86_tune)
13531 case PROCESSOR_PENTIUM:
13532 /* Address Generation Interlock adds a cycle of latency. */
13533 if (ix86_agi_dependant (insn, dep_insn, insn_type))
13534 cost += 1;
13536 /* ??? Compares pair with jump/setcc. */
13537 if (ix86_flags_dependant (insn, dep_insn, insn_type))
13538 cost = 0;
13540 /* Floating point stores require value to be ready one cycle earlier. */
13541 if (insn_type == TYPE_FMOV
13542 && get_attr_memory (insn) == MEMORY_STORE
13543 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13544 cost += 1;
13545 break;
13547 case PROCESSOR_PENTIUMPRO:
13548 memory = get_attr_memory (insn);
13550 /* INT->FP conversion is expensive. */
13551 if (get_attr_fp_int_src (dep_insn))
13552 cost += 5;
13554 /* There is one cycle extra latency between an FP op and a store. */
13555 if (insn_type == TYPE_FMOV
13556 && (set = single_set (dep_insn)) != NULL_RTX
13557 && (set2 = single_set (insn)) != NULL_RTX
13558 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13559 && GET_CODE (SET_DEST (set2)) == MEM)
13560 cost += 1;
13562 /* Show ability of reorder buffer to hide latency of load by executing
13563 in parallel with previous instruction in case
13564 previous instruction is not needed to compute the address. */
13565 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13566 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13568 /* Claim moves to take one cycle, as core can issue one load
13569 at time and the next load can start cycle later. */
13570 if (dep_insn_type == TYPE_IMOV
13571 || dep_insn_type == TYPE_FMOV)
13572 cost = 1;
13573 else if (cost > 1)
13574 cost--;
13576 break;
13578 case PROCESSOR_K6:
13579 memory = get_attr_memory (insn);
13581 /* The esp dependency is resolved before the instruction is really
13582 finished. */
13583 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13584 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13585 return 1;
13587 /* INT->FP conversion is expensive. */
13588 if (get_attr_fp_int_src (dep_insn))
13589 cost += 5;
13591 /* Show ability of reorder buffer to hide latency of load by executing
13592 in parallel with previous instruction in case
13593 previous instruction is not needed to compute the address. */
13594 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13595 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13597 /* Claim moves to take one cycle, as core can issue one load
13598 at time and the next load can start cycle later. */
13599 if (dep_insn_type == TYPE_IMOV
13600 || dep_insn_type == TYPE_FMOV)
13601 cost = 1;
13602 else if (cost > 2)
13603 cost -= 2;
13604 else
13605 cost = 1;
13607 break;
13609 case PROCESSOR_ATHLON:
13610 case PROCESSOR_K8:
13611 case PROCESSOR_GENERIC32:
13612 case PROCESSOR_GENERIC64:
13613 memory = get_attr_memory (insn);
13615 /* Show ability of reorder buffer to hide latency of load by executing
13616 in parallel with previous instruction in case
13617 previous instruction is not needed to compute the address. */
13618 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13619 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13621 enum attr_unit unit = get_attr_unit (insn);
13622 int loadcost = 3;
13624 /* Because of the difference between the length of integer and
13625 floating unit pipeline preparation stages, the memory operands
13626 for floating point are cheaper.
13628 ??? For Athlon it the difference is most probably 2. */
13629 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13630 loadcost = 3;
13631 else
13632 loadcost = TARGET_ATHLON ? 2 : 0;
13634 if (cost >= loadcost)
13635 cost -= loadcost;
13636 else
13637 cost = 0;
13640 default:
13641 break;
13644 return cost;
13647 /* How many alternative schedules to try. This should be as wide as the
13648 scheduling freedom in the DFA, but no wider. Making this value too
13649 large results extra work for the scheduler. */
13651 static int
13652 ia32_multipass_dfa_lookahead (void)
13654 if (ix86_tune == PROCESSOR_PENTIUM)
13655 return 2;
13657 if (ix86_tune == PROCESSOR_PENTIUMPRO
13658 || ix86_tune == PROCESSOR_K6)
13659 return 1;
13661 else
13662 return 0;
13666 /* Compute the alignment given to a constant that is being placed in memory.
13667 EXP is the constant and ALIGN is the alignment that the object would
13668 ordinarily have.
13669 The value of this function is used instead of that alignment to align
13670 the object. */
13673 ix86_constant_alignment (tree exp, int align)
13675 if (TREE_CODE (exp) == REAL_CST)
13677 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13678 return 64;
13679 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13680 return 128;
13682 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13683 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13684 return BITS_PER_WORD;
13686 return align;
13689 /* Compute the alignment for a static variable.
13690 TYPE is the data type, and ALIGN is the alignment that
13691 the object would ordinarily have. The value of this function is used
13692 instead of that alignment to align the object. */
13695 ix86_data_alignment (tree type, int align)
13697 int max_align = optimize_size ? BITS_PER_WORD : 256;
13699 if (AGGREGATE_TYPE_P (type)
13700 && TYPE_SIZE (type)
13701 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13702 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13703 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13704 && align < max_align)
13705 align = max_align;
13707 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13708 to 16byte boundary. */
13709 if (TARGET_64BIT)
13711 if (AGGREGATE_TYPE_P (type)
13712 && TYPE_SIZE (type)
13713 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13714 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13715 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13716 return 128;
13719 if (TREE_CODE (type) == ARRAY_TYPE)
13721 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13722 return 64;
13723 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13724 return 128;
13726 else if (TREE_CODE (type) == COMPLEX_TYPE)
13729 if (TYPE_MODE (type) == DCmode && align < 64)
13730 return 64;
13731 if (TYPE_MODE (type) == XCmode && align < 128)
13732 return 128;
13734 else if ((TREE_CODE (type) == RECORD_TYPE
13735 || TREE_CODE (type) == UNION_TYPE
13736 || TREE_CODE (type) == QUAL_UNION_TYPE)
13737 && TYPE_FIELDS (type))
13739 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13740 return 64;
13741 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13742 return 128;
13744 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13745 || TREE_CODE (type) == INTEGER_TYPE)
13747 if (TYPE_MODE (type) == DFmode && align < 64)
13748 return 64;
13749 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13750 return 128;
13753 return align;
13756 /* Compute the alignment for a local variable.
13757 TYPE is the data type, and ALIGN is the alignment that
13758 the object would ordinarily have. The value of this macro is used
13759 instead of that alignment to align the object. */
13762 ix86_local_alignment (tree type, int align)
13764 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13765 to 16byte boundary. */
13766 if (TARGET_64BIT)
13768 if (AGGREGATE_TYPE_P (type)
13769 && TYPE_SIZE (type)
13770 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13771 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13772 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13773 return 128;
13775 if (TREE_CODE (type) == ARRAY_TYPE)
13777 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13778 return 64;
13779 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13780 return 128;
13782 else if (TREE_CODE (type) == COMPLEX_TYPE)
13784 if (TYPE_MODE (type) == DCmode && align < 64)
13785 return 64;
13786 if (TYPE_MODE (type) == XCmode && align < 128)
13787 return 128;
13789 else if ((TREE_CODE (type) == RECORD_TYPE
13790 || TREE_CODE (type) == UNION_TYPE
13791 || TREE_CODE (type) == QUAL_UNION_TYPE)
13792 && TYPE_FIELDS (type))
13794 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13795 return 64;
13796 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13797 return 128;
13799 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13800 || TREE_CODE (type) == INTEGER_TYPE)
13803 if (TYPE_MODE (type) == DFmode && align < 64)
13804 return 64;
13805 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13806 return 128;
13808 return align;
13811 /* Emit RTL insns to initialize the variable parts of a trampoline.
13812 FNADDR is an RTX for the address of the function's pure code.
13813 CXT is an RTX for the static chain value for the function. */
13814 void
13815 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
13817 if (!TARGET_64BIT)
13819 /* Compute offset from the end of the jmp to the target function. */
13820 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
13821 plus_constant (tramp, 10),
13822 NULL_RTX, 1, OPTAB_DIRECT);
13823 emit_move_insn (gen_rtx_MEM (QImode, tramp),
13824 gen_int_mode (0xb9, QImode));
13825 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
13826 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
13827 gen_int_mode (0xe9, QImode));
13828 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
13830 else
13832 int offset = 0;
13833 /* Try to load address using shorter movl instead of movabs.
13834 We may want to support movq for kernel mode, but kernel does not use
13835 trampolines at the moment. */
13836 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
13838 fnaddr = copy_to_mode_reg (DImode, fnaddr);
13839 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13840 gen_int_mode (0xbb41, HImode));
13841 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
13842 gen_lowpart (SImode, fnaddr));
13843 offset += 6;
13845 else
13847 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13848 gen_int_mode (0xbb49, HImode));
13849 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13850 fnaddr);
13851 offset += 10;
13853 /* Load static chain using movabs to r10. */
13854 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13855 gen_int_mode (0xba49, HImode));
13856 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13857 cxt);
13858 offset += 10;
13859 /* Jump to the r11 */
13860 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13861 gen_int_mode (0xff49, HImode));
13862 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
13863 gen_int_mode (0xe3, QImode));
13864 offset += 3;
13865 gcc_assert (offset <= TRAMPOLINE_SIZE);
13868 #ifdef ENABLE_EXECUTE_STACK
13869 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
13870 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13871 #endif
13874 /* Codes for all the SSE/MMX builtins. */
13875 enum ix86_builtins
13877 IX86_BUILTIN_ADDPS,
13878 IX86_BUILTIN_ADDSS,
13879 IX86_BUILTIN_DIVPS,
13880 IX86_BUILTIN_DIVSS,
13881 IX86_BUILTIN_MULPS,
13882 IX86_BUILTIN_MULSS,
13883 IX86_BUILTIN_SUBPS,
13884 IX86_BUILTIN_SUBSS,
13886 IX86_BUILTIN_CMPEQPS,
13887 IX86_BUILTIN_CMPLTPS,
13888 IX86_BUILTIN_CMPLEPS,
13889 IX86_BUILTIN_CMPGTPS,
13890 IX86_BUILTIN_CMPGEPS,
13891 IX86_BUILTIN_CMPNEQPS,
13892 IX86_BUILTIN_CMPNLTPS,
13893 IX86_BUILTIN_CMPNLEPS,
13894 IX86_BUILTIN_CMPNGTPS,
13895 IX86_BUILTIN_CMPNGEPS,
13896 IX86_BUILTIN_CMPORDPS,
13897 IX86_BUILTIN_CMPUNORDPS,
13898 IX86_BUILTIN_CMPEQSS,
13899 IX86_BUILTIN_CMPLTSS,
13900 IX86_BUILTIN_CMPLESS,
13901 IX86_BUILTIN_CMPNEQSS,
13902 IX86_BUILTIN_CMPNLTSS,
13903 IX86_BUILTIN_CMPNLESS,
13904 IX86_BUILTIN_CMPNGTSS,
13905 IX86_BUILTIN_CMPNGESS,
13906 IX86_BUILTIN_CMPORDSS,
13907 IX86_BUILTIN_CMPUNORDSS,
13909 IX86_BUILTIN_COMIEQSS,
13910 IX86_BUILTIN_COMILTSS,
13911 IX86_BUILTIN_COMILESS,
13912 IX86_BUILTIN_COMIGTSS,
13913 IX86_BUILTIN_COMIGESS,
13914 IX86_BUILTIN_COMINEQSS,
13915 IX86_BUILTIN_UCOMIEQSS,
13916 IX86_BUILTIN_UCOMILTSS,
13917 IX86_BUILTIN_UCOMILESS,
13918 IX86_BUILTIN_UCOMIGTSS,
13919 IX86_BUILTIN_UCOMIGESS,
13920 IX86_BUILTIN_UCOMINEQSS,
13922 IX86_BUILTIN_CVTPI2PS,
13923 IX86_BUILTIN_CVTPS2PI,
13924 IX86_BUILTIN_CVTSI2SS,
13925 IX86_BUILTIN_CVTSI642SS,
13926 IX86_BUILTIN_CVTSS2SI,
13927 IX86_BUILTIN_CVTSS2SI64,
13928 IX86_BUILTIN_CVTTPS2PI,
13929 IX86_BUILTIN_CVTTSS2SI,
13930 IX86_BUILTIN_CVTTSS2SI64,
13932 IX86_BUILTIN_MAXPS,
13933 IX86_BUILTIN_MAXSS,
13934 IX86_BUILTIN_MINPS,
13935 IX86_BUILTIN_MINSS,
13937 IX86_BUILTIN_LOADUPS,
13938 IX86_BUILTIN_STOREUPS,
13939 IX86_BUILTIN_MOVSS,
13941 IX86_BUILTIN_MOVHLPS,
13942 IX86_BUILTIN_MOVLHPS,
13943 IX86_BUILTIN_LOADHPS,
13944 IX86_BUILTIN_LOADLPS,
13945 IX86_BUILTIN_STOREHPS,
13946 IX86_BUILTIN_STORELPS,
13948 IX86_BUILTIN_MASKMOVQ,
13949 IX86_BUILTIN_MOVMSKPS,
13950 IX86_BUILTIN_PMOVMSKB,
13952 IX86_BUILTIN_MOVNTPS,
13953 IX86_BUILTIN_MOVNTQ,
13955 IX86_BUILTIN_LOADDQU,
13956 IX86_BUILTIN_STOREDQU,
13958 IX86_BUILTIN_PACKSSWB,
13959 IX86_BUILTIN_PACKSSDW,
13960 IX86_BUILTIN_PACKUSWB,
13962 IX86_BUILTIN_PADDB,
13963 IX86_BUILTIN_PADDW,
13964 IX86_BUILTIN_PADDD,
13965 IX86_BUILTIN_PADDQ,
13966 IX86_BUILTIN_PADDSB,
13967 IX86_BUILTIN_PADDSW,
13968 IX86_BUILTIN_PADDUSB,
13969 IX86_BUILTIN_PADDUSW,
13970 IX86_BUILTIN_PSUBB,
13971 IX86_BUILTIN_PSUBW,
13972 IX86_BUILTIN_PSUBD,
13973 IX86_BUILTIN_PSUBQ,
13974 IX86_BUILTIN_PSUBSB,
13975 IX86_BUILTIN_PSUBSW,
13976 IX86_BUILTIN_PSUBUSB,
13977 IX86_BUILTIN_PSUBUSW,
13979 IX86_BUILTIN_PAND,
13980 IX86_BUILTIN_PANDN,
13981 IX86_BUILTIN_POR,
13982 IX86_BUILTIN_PXOR,
13984 IX86_BUILTIN_PAVGB,
13985 IX86_BUILTIN_PAVGW,
13987 IX86_BUILTIN_PCMPEQB,
13988 IX86_BUILTIN_PCMPEQW,
13989 IX86_BUILTIN_PCMPEQD,
13990 IX86_BUILTIN_PCMPGTB,
13991 IX86_BUILTIN_PCMPGTW,
13992 IX86_BUILTIN_PCMPGTD,
13994 IX86_BUILTIN_PMADDWD,
13996 IX86_BUILTIN_PMAXSW,
13997 IX86_BUILTIN_PMAXUB,
13998 IX86_BUILTIN_PMINSW,
13999 IX86_BUILTIN_PMINUB,
14001 IX86_BUILTIN_PMULHUW,
14002 IX86_BUILTIN_PMULHW,
14003 IX86_BUILTIN_PMULLW,
14005 IX86_BUILTIN_PSADBW,
14006 IX86_BUILTIN_PSHUFW,
14008 IX86_BUILTIN_PSLLW,
14009 IX86_BUILTIN_PSLLD,
14010 IX86_BUILTIN_PSLLQ,
14011 IX86_BUILTIN_PSRAW,
14012 IX86_BUILTIN_PSRAD,
14013 IX86_BUILTIN_PSRLW,
14014 IX86_BUILTIN_PSRLD,
14015 IX86_BUILTIN_PSRLQ,
14016 IX86_BUILTIN_PSLLWI,
14017 IX86_BUILTIN_PSLLDI,
14018 IX86_BUILTIN_PSLLQI,
14019 IX86_BUILTIN_PSRAWI,
14020 IX86_BUILTIN_PSRADI,
14021 IX86_BUILTIN_PSRLWI,
14022 IX86_BUILTIN_PSRLDI,
14023 IX86_BUILTIN_PSRLQI,
14025 IX86_BUILTIN_PUNPCKHBW,
14026 IX86_BUILTIN_PUNPCKHWD,
14027 IX86_BUILTIN_PUNPCKHDQ,
14028 IX86_BUILTIN_PUNPCKLBW,
14029 IX86_BUILTIN_PUNPCKLWD,
14030 IX86_BUILTIN_PUNPCKLDQ,
14032 IX86_BUILTIN_SHUFPS,
14034 IX86_BUILTIN_RCPPS,
14035 IX86_BUILTIN_RCPSS,
14036 IX86_BUILTIN_RSQRTPS,
14037 IX86_BUILTIN_RSQRTSS,
14038 IX86_BUILTIN_SQRTPS,
14039 IX86_BUILTIN_SQRTSS,
14041 IX86_BUILTIN_UNPCKHPS,
14042 IX86_BUILTIN_UNPCKLPS,
14044 IX86_BUILTIN_ANDPS,
14045 IX86_BUILTIN_ANDNPS,
14046 IX86_BUILTIN_ORPS,
14047 IX86_BUILTIN_XORPS,
14049 IX86_BUILTIN_EMMS,
14050 IX86_BUILTIN_LDMXCSR,
14051 IX86_BUILTIN_STMXCSR,
14052 IX86_BUILTIN_SFENCE,
14054 /* 3DNow! Original */
14055 IX86_BUILTIN_FEMMS,
14056 IX86_BUILTIN_PAVGUSB,
14057 IX86_BUILTIN_PF2ID,
14058 IX86_BUILTIN_PFACC,
14059 IX86_BUILTIN_PFADD,
14060 IX86_BUILTIN_PFCMPEQ,
14061 IX86_BUILTIN_PFCMPGE,
14062 IX86_BUILTIN_PFCMPGT,
14063 IX86_BUILTIN_PFMAX,
14064 IX86_BUILTIN_PFMIN,
14065 IX86_BUILTIN_PFMUL,
14066 IX86_BUILTIN_PFRCP,
14067 IX86_BUILTIN_PFRCPIT1,
14068 IX86_BUILTIN_PFRCPIT2,
14069 IX86_BUILTIN_PFRSQIT1,
14070 IX86_BUILTIN_PFRSQRT,
14071 IX86_BUILTIN_PFSUB,
14072 IX86_BUILTIN_PFSUBR,
14073 IX86_BUILTIN_PI2FD,
14074 IX86_BUILTIN_PMULHRW,
14076 /* 3DNow! Athlon Extensions */
14077 IX86_BUILTIN_PF2IW,
14078 IX86_BUILTIN_PFNACC,
14079 IX86_BUILTIN_PFPNACC,
14080 IX86_BUILTIN_PI2FW,
14081 IX86_BUILTIN_PSWAPDSI,
14082 IX86_BUILTIN_PSWAPDSF,
14084 /* SSE2 */
14085 IX86_BUILTIN_ADDPD,
14086 IX86_BUILTIN_ADDSD,
14087 IX86_BUILTIN_DIVPD,
14088 IX86_BUILTIN_DIVSD,
14089 IX86_BUILTIN_MULPD,
14090 IX86_BUILTIN_MULSD,
14091 IX86_BUILTIN_SUBPD,
14092 IX86_BUILTIN_SUBSD,
14094 IX86_BUILTIN_CMPEQPD,
14095 IX86_BUILTIN_CMPLTPD,
14096 IX86_BUILTIN_CMPLEPD,
14097 IX86_BUILTIN_CMPGTPD,
14098 IX86_BUILTIN_CMPGEPD,
14099 IX86_BUILTIN_CMPNEQPD,
14100 IX86_BUILTIN_CMPNLTPD,
14101 IX86_BUILTIN_CMPNLEPD,
14102 IX86_BUILTIN_CMPNGTPD,
14103 IX86_BUILTIN_CMPNGEPD,
14104 IX86_BUILTIN_CMPORDPD,
14105 IX86_BUILTIN_CMPUNORDPD,
14106 IX86_BUILTIN_CMPNEPD,
14107 IX86_BUILTIN_CMPEQSD,
14108 IX86_BUILTIN_CMPLTSD,
14109 IX86_BUILTIN_CMPLESD,
14110 IX86_BUILTIN_CMPNEQSD,
14111 IX86_BUILTIN_CMPNLTSD,
14112 IX86_BUILTIN_CMPNLESD,
14113 IX86_BUILTIN_CMPORDSD,
14114 IX86_BUILTIN_CMPUNORDSD,
14115 IX86_BUILTIN_CMPNESD,
14117 IX86_BUILTIN_COMIEQSD,
14118 IX86_BUILTIN_COMILTSD,
14119 IX86_BUILTIN_COMILESD,
14120 IX86_BUILTIN_COMIGTSD,
14121 IX86_BUILTIN_COMIGESD,
14122 IX86_BUILTIN_COMINEQSD,
14123 IX86_BUILTIN_UCOMIEQSD,
14124 IX86_BUILTIN_UCOMILTSD,
14125 IX86_BUILTIN_UCOMILESD,
14126 IX86_BUILTIN_UCOMIGTSD,
14127 IX86_BUILTIN_UCOMIGESD,
14128 IX86_BUILTIN_UCOMINEQSD,
14130 IX86_BUILTIN_MAXPD,
14131 IX86_BUILTIN_MAXSD,
14132 IX86_BUILTIN_MINPD,
14133 IX86_BUILTIN_MINSD,
14135 IX86_BUILTIN_ANDPD,
14136 IX86_BUILTIN_ANDNPD,
14137 IX86_BUILTIN_ORPD,
14138 IX86_BUILTIN_XORPD,
14140 IX86_BUILTIN_SQRTPD,
14141 IX86_BUILTIN_SQRTSD,
14143 IX86_BUILTIN_UNPCKHPD,
14144 IX86_BUILTIN_UNPCKLPD,
14146 IX86_BUILTIN_SHUFPD,
14148 IX86_BUILTIN_LOADUPD,
14149 IX86_BUILTIN_STOREUPD,
14150 IX86_BUILTIN_MOVSD,
14152 IX86_BUILTIN_LOADHPD,
14153 IX86_BUILTIN_LOADLPD,
14155 IX86_BUILTIN_CVTDQ2PD,
14156 IX86_BUILTIN_CVTDQ2PS,
14158 IX86_BUILTIN_CVTPD2DQ,
14159 IX86_BUILTIN_CVTPD2PI,
14160 IX86_BUILTIN_CVTPD2PS,
14161 IX86_BUILTIN_CVTTPD2DQ,
14162 IX86_BUILTIN_CVTTPD2PI,
14164 IX86_BUILTIN_CVTPI2PD,
14165 IX86_BUILTIN_CVTSI2SD,
14166 IX86_BUILTIN_CVTSI642SD,
14168 IX86_BUILTIN_CVTSD2SI,
14169 IX86_BUILTIN_CVTSD2SI64,
14170 IX86_BUILTIN_CVTSD2SS,
14171 IX86_BUILTIN_CVTSS2SD,
14172 IX86_BUILTIN_CVTTSD2SI,
14173 IX86_BUILTIN_CVTTSD2SI64,
14175 IX86_BUILTIN_CVTPS2DQ,
14176 IX86_BUILTIN_CVTPS2PD,
14177 IX86_BUILTIN_CVTTPS2DQ,
14179 IX86_BUILTIN_MOVNTI,
14180 IX86_BUILTIN_MOVNTPD,
14181 IX86_BUILTIN_MOVNTDQ,
14183 /* SSE2 MMX */
14184 IX86_BUILTIN_MASKMOVDQU,
14185 IX86_BUILTIN_MOVMSKPD,
14186 IX86_BUILTIN_PMOVMSKB128,
14188 IX86_BUILTIN_PACKSSWB128,
14189 IX86_BUILTIN_PACKSSDW128,
14190 IX86_BUILTIN_PACKUSWB128,
14192 IX86_BUILTIN_PADDB128,
14193 IX86_BUILTIN_PADDW128,
14194 IX86_BUILTIN_PADDD128,
14195 IX86_BUILTIN_PADDQ128,
14196 IX86_BUILTIN_PADDSB128,
14197 IX86_BUILTIN_PADDSW128,
14198 IX86_BUILTIN_PADDUSB128,
14199 IX86_BUILTIN_PADDUSW128,
14200 IX86_BUILTIN_PSUBB128,
14201 IX86_BUILTIN_PSUBW128,
14202 IX86_BUILTIN_PSUBD128,
14203 IX86_BUILTIN_PSUBQ128,
14204 IX86_BUILTIN_PSUBSB128,
14205 IX86_BUILTIN_PSUBSW128,
14206 IX86_BUILTIN_PSUBUSB128,
14207 IX86_BUILTIN_PSUBUSW128,
14209 IX86_BUILTIN_PAND128,
14210 IX86_BUILTIN_PANDN128,
14211 IX86_BUILTIN_POR128,
14212 IX86_BUILTIN_PXOR128,
14214 IX86_BUILTIN_PAVGB128,
14215 IX86_BUILTIN_PAVGW128,
14217 IX86_BUILTIN_PCMPEQB128,
14218 IX86_BUILTIN_PCMPEQW128,
14219 IX86_BUILTIN_PCMPEQD128,
14220 IX86_BUILTIN_PCMPGTB128,
14221 IX86_BUILTIN_PCMPGTW128,
14222 IX86_BUILTIN_PCMPGTD128,
14224 IX86_BUILTIN_PMADDWD128,
14226 IX86_BUILTIN_PMAXSW128,
14227 IX86_BUILTIN_PMAXUB128,
14228 IX86_BUILTIN_PMINSW128,
14229 IX86_BUILTIN_PMINUB128,
14231 IX86_BUILTIN_PMULUDQ,
14232 IX86_BUILTIN_PMULUDQ128,
14233 IX86_BUILTIN_PMULHUW128,
14234 IX86_BUILTIN_PMULHW128,
14235 IX86_BUILTIN_PMULLW128,
14237 IX86_BUILTIN_PSADBW128,
14238 IX86_BUILTIN_PSHUFHW,
14239 IX86_BUILTIN_PSHUFLW,
14240 IX86_BUILTIN_PSHUFD,
14242 IX86_BUILTIN_PSLLW128,
14243 IX86_BUILTIN_PSLLD128,
14244 IX86_BUILTIN_PSLLQ128,
14245 IX86_BUILTIN_PSRAW128,
14246 IX86_BUILTIN_PSRAD128,
14247 IX86_BUILTIN_PSRLW128,
14248 IX86_BUILTIN_PSRLD128,
14249 IX86_BUILTIN_PSRLQ128,
14250 IX86_BUILTIN_PSLLDQI128,
14251 IX86_BUILTIN_PSLLWI128,
14252 IX86_BUILTIN_PSLLDI128,
14253 IX86_BUILTIN_PSLLQI128,
14254 IX86_BUILTIN_PSRAWI128,
14255 IX86_BUILTIN_PSRADI128,
14256 IX86_BUILTIN_PSRLDQI128,
14257 IX86_BUILTIN_PSRLWI128,
14258 IX86_BUILTIN_PSRLDI128,
14259 IX86_BUILTIN_PSRLQI128,
14261 IX86_BUILTIN_PUNPCKHBW128,
14262 IX86_BUILTIN_PUNPCKHWD128,
14263 IX86_BUILTIN_PUNPCKHDQ128,
14264 IX86_BUILTIN_PUNPCKHQDQ128,
14265 IX86_BUILTIN_PUNPCKLBW128,
14266 IX86_BUILTIN_PUNPCKLWD128,
14267 IX86_BUILTIN_PUNPCKLDQ128,
14268 IX86_BUILTIN_PUNPCKLQDQ128,
14270 IX86_BUILTIN_CLFLUSH,
14271 IX86_BUILTIN_MFENCE,
14272 IX86_BUILTIN_LFENCE,
14274 /* Prescott New Instructions. */
14275 IX86_BUILTIN_ADDSUBPS,
14276 IX86_BUILTIN_HADDPS,
14277 IX86_BUILTIN_HSUBPS,
14278 IX86_BUILTIN_MOVSHDUP,
14279 IX86_BUILTIN_MOVSLDUP,
14280 IX86_BUILTIN_ADDSUBPD,
14281 IX86_BUILTIN_HADDPD,
14282 IX86_BUILTIN_HSUBPD,
14283 IX86_BUILTIN_LDDQU,
14285 IX86_BUILTIN_MONITOR,
14286 IX86_BUILTIN_MWAIT,
14288 IX86_BUILTIN_VEC_INIT_V2SI,
14289 IX86_BUILTIN_VEC_INIT_V4HI,
14290 IX86_BUILTIN_VEC_INIT_V8QI,
14291 IX86_BUILTIN_VEC_EXT_V2DF,
14292 IX86_BUILTIN_VEC_EXT_V2DI,
14293 IX86_BUILTIN_VEC_EXT_V4SF,
14294 IX86_BUILTIN_VEC_EXT_V4SI,
14295 IX86_BUILTIN_VEC_EXT_V8HI,
14296 IX86_BUILTIN_VEC_EXT_V2SI,
14297 IX86_BUILTIN_VEC_EXT_V4HI,
14298 IX86_BUILTIN_VEC_SET_V8HI,
14299 IX86_BUILTIN_VEC_SET_V4HI,
14301 /* SSE2 ABI functions. */
14302 IX86_BUILTIN_SSE2_ACOS,
14303 IX86_BUILTIN_SSE2_ACOSF,
14304 IX86_BUILTIN_SSE2_ASIN,
14305 IX86_BUILTIN_SSE2_ASINF,
14306 IX86_BUILTIN_SSE2_ATAN,
14307 IX86_BUILTIN_SSE2_ATANF,
14308 IX86_BUILTIN_SSE2_ATAN2,
14309 IX86_BUILTIN_SSE2_ATAN2F,
14310 IX86_BUILTIN_SSE2_COS,
14311 IX86_BUILTIN_SSE2_COSF,
14312 IX86_BUILTIN_SSE2_EXP,
14313 IX86_BUILTIN_SSE2_EXPF,
14314 IX86_BUILTIN_SSE2_LOG10,
14315 IX86_BUILTIN_SSE2_LOG10F,
14316 IX86_BUILTIN_SSE2_LOG,
14317 IX86_BUILTIN_SSE2_LOGF,
14318 IX86_BUILTIN_SSE2_SIN,
14319 IX86_BUILTIN_SSE2_SINF,
14320 IX86_BUILTIN_SSE2_TAN,
14321 IX86_BUILTIN_SSE2_TANF,
14323 IX86_BUILTIN_MAX
14326 #define def_builtin(MASK, NAME, TYPE, CODE) \
14327 do { \
14328 if ((MASK) & target_flags \
14329 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14330 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14331 NULL, NULL_TREE); \
14332 } while (0)
14334 /* Bits for builtin_description.flag. */
14336 /* Set when we don't support the comparison natively, and should
14337 swap_comparison in order to support it. */
14338 #define BUILTIN_DESC_SWAP_OPERANDS 1
14340 struct builtin_description
14342 const unsigned int mask;
14343 const enum insn_code icode;
14344 const char *const name;
14345 const enum ix86_builtins code;
14346 const enum rtx_code comparison;
14347 const unsigned int flag;
14350 static const struct builtin_description bdesc_comi[] =
14352 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14353 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14354 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14355 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14356 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14357 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14358 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14359 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14360 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14361 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14362 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14363 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14364 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14365 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14366 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14367 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14368 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14369 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14370 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14371 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14372 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14373 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14374 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14375 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14378 static const struct builtin_description bdesc_2arg[] =
14380 /* SSE */
14381 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14382 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14383 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14384 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14385 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14386 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14387 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14388 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14390 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14391 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14392 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14393 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14394 BUILTIN_DESC_SWAP_OPERANDS },
14395 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14396 BUILTIN_DESC_SWAP_OPERANDS },
14397 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14398 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14399 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14400 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14401 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14402 BUILTIN_DESC_SWAP_OPERANDS },
14403 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14404 BUILTIN_DESC_SWAP_OPERANDS },
14405 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14406 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14407 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14408 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14409 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14410 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14411 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14412 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14413 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14414 BUILTIN_DESC_SWAP_OPERANDS },
14415 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14416 BUILTIN_DESC_SWAP_OPERANDS },
14417 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14419 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14420 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14421 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14422 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14424 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14425 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14426 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14427 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14429 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14430 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14431 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14432 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14433 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14435 /* MMX */
14436 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14437 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14438 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14439 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14440 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14441 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14442 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14443 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14445 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14446 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14447 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14448 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14449 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14450 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14451 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14452 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14454 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14455 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14456 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14458 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14459 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14460 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14461 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14463 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14464 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14466 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14467 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14468 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14469 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14470 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14471 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14473 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14474 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14475 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14476 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14478 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14479 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14480 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14481 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14482 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14483 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14485 /* Special. */
14486 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14487 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14488 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14490 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14491 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14492 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14494 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14495 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14496 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14497 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14498 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14499 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14501 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14502 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14503 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14504 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14505 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14506 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14508 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14509 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14510 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14511 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14513 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14514 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14516 /* SSE2 */
14517 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14518 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14519 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14520 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14521 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14522 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14523 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14524 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14526 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14527 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14528 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14529 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14530 BUILTIN_DESC_SWAP_OPERANDS },
14531 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14532 BUILTIN_DESC_SWAP_OPERANDS },
14533 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14534 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14535 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14536 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14537 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14538 BUILTIN_DESC_SWAP_OPERANDS },
14539 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14540 BUILTIN_DESC_SWAP_OPERANDS },
14541 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14542 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14543 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14544 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14545 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14546 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14547 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14548 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14549 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14551 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14552 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14553 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14554 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14556 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14557 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14558 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14559 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14561 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14562 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14563 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14565 /* SSE2 MMX */
14566 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14567 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14568 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14569 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14570 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14571 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14572 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14573 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14575 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14576 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14577 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14578 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14579 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14580 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14581 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14582 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14584 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14585 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14587 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14588 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14589 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14590 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14592 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14593 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14595 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14596 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14597 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14598 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14599 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14600 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14602 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14603 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14604 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14605 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14607 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14608 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14609 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14610 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14611 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14612 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14613 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14614 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14616 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14617 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14618 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14620 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14621 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14623 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14624 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14626 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14627 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14628 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14630 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14631 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14632 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14634 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14635 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14637 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14639 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14640 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14641 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14642 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14644 /* SSE3 MMX */
14645 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14646 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14647 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14648 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14649 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14650 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14653 static const struct builtin_description bdesc_1arg[] =
14655 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14656 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14658 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14659 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14660 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14662 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14663 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14664 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14665 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14666 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14667 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14669 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14670 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14672 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14674 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14675 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14677 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14678 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14679 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14680 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14681 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14683 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14685 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14686 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14687 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14688 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14690 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14691 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14692 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14694 /* SSE3 */
14695 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14696 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14699 static void
14700 ix86_init_builtins (void)
14702 if (TARGET_MMX)
14703 ix86_init_mmx_sse_builtins ();
14704 if (TARGET_SSE2)
14705 ix86_init_sse_abi_builtins ();
14708 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14709 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14710 builtins. */
14711 static void
14712 ix86_init_mmx_sse_builtins (void)
14714 const struct builtin_description * d;
14715 size_t i;
14717 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14718 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14719 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14720 tree V2DI_type_node
14721 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14722 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14723 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14724 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14725 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14726 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14727 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14729 tree pchar_type_node = build_pointer_type (char_type_node);
14730 tree pcchar_type_node = build_pointer_type (
14731 build_type_variant (char_type_node, 1, 0));
14732 tree pfloat_type_node = build_pointer_type (float_type_node);
14733 tree pcfloat_type_node = build_pointer_type (
14734 build_type_variant (float_type_node, 1, 0));
14735 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14736 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14737 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14739 /* Comparisons. */
14740 tree int_ftype_v4sf_v4sf
14741 = build_function_type_list (integer_type_node,
14742 V4SF_type_node, V4SF_type_node, NULL_TREE);
14743 tree v4si_ftype_v4sf_v4sf
14744 = build_function_type_list (V4SI_type_node,
14745 V4SF_type_node, V4SF_type_node, NULL_TREE);
14746 /* MMX/SSE/integer conversions. */
14747 tree int_ftype_v4sf
14748 = build_function_type_list (integer_type_node,
14749 V4SF_type_node, NULL_TREE);
14750 tree int64_ftype_v4sf
14751 = build_function_type_list (long_long_integer_type_node,
14752 V4SF_type_node, NULL_TREE);
14753 tree int_ftype_v8qi
14754 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14755 tree v4sf_ftype_v4sf_int
14756 = build_function_type_list (V4SF_type_node,
14757 V4SF_type_node, integer_type_node, NULL_TREE);
14758 tree v4sf_ftype_v4sf_int64
14759 = build_function_type_list (V4SF_type_node,
14760 V4SF_type_node, long_long_integer_type_node,
14761 NULL_TREE);
14762 tree v4sf_ftype_v4sf_v2si
14763 = build_function_type_list (V4SF_type_node,
14764 V4SF_type_node, V2SI_type_node, NULL_TREE);
14766 /* Miscellaneous. */
14767 tree v8qi_ftype_v4hi_v4hi
14768 = build_function_type_list (V8QI_type_node,
14769 V4HI_type_node, V4HI_type_node, NULL_TREE);
14770 tree v4hi_ftype_v2si_v2si
14771 = build_function_type_list (V4HI_type_node,
14772 V2SI_type_node, V2SI_type_node, NULL_TREE);
14773 tree v4sf_ftype_v4sf_v4sf_int
14774 = build_function_type_list (V4SF_type_node,
14775 V4SF_type_node, V4SF_type_node,
14776 integer_type_node, NULL_TREE);
14777 tree v2si_ftype_v4hi_v4hi
14778 = build_function_type_list (V2SI_type_node,
14779 V4HI_type_node, V4HI_type_node, NULL_TREE);
14780 tree v4hi_ftype_v4hi_int
14781 = build_function_type_list (V4HI_type_node,
14782 V4HI_type_node, integer_type_node, NULL_TREE);
14783 tree v4hi_ftype_v4hi_di
14784 = build_function_type_list (V4HI_type_node,
14785 V4HI_type_node, long_long_unsigned_type_node,
14786 NULL_TREE);
14787 tree v2si_ftype_v2si_di
14788 = build_function_type_list (V2SI_type_node,
14789 V2SI_type_node, long_long_unsigned_type_node,
14790 NULL_TREE);
14791 tree void_ftype_void
14792 = build_function_type (void_type_node, void_list_node);
14793 tree void_ftype_unsigned
14794 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14795 tree void_ftype_unsigned_unsigned
14796 = build_function_type_list (void_type_node, unsigned_type_node,
14797 unsigned_type_node, NULL_TREE);
14798 tree void_ftype_pcvoid_unsigned_unsigned
14799 = build_function_type_list (void_type_node, const_ptr_type_node,
14800 unsigned_type_node, unsigned_type_node,
14801 NULL_TREE);
14802 tree unsigned_ftype_void
14803 = build_function_type (unsigned_type_node, void_list_node);
14804 tree v2si_ftype_v4sf
14805 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14806 /* Loads/stores. */
14807 tree void_ftype_v8qi_v8qi_pchar
14808 = build_function_type_list (void_type_node,
14809 V8QI_type_node, V8QI_type_node,
14810 pchar_type_node, NULL_TREE);
14811 tree v4sf_ftype_pcfloat
14812 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14813 /* @@@ the type is bogus */
14814 tree v4sf_ftype_v4sf_pv2si
14815 = build_function_type_list (V4SF_type_node,
14816 V4SF_type_node, pv2si_type_node, NULL_TREE);
14817 tree void_ftype_pv2si_v4sf
14818 = build_function_type_list (void_type_node,
14819 pv2si_type_node, V4SF_type_node, NULL_TREE);
14820 tree void_ftype_pfloat_v4sf
14821 = build_function_type_list (void_type_node,
14822 pfloat_type_node, V4SF_type_node, NULL_TREE);
14823 tree void_ftype_pdi_di
14824 = build_function_type_list (void_type_node,
14825 pdi_type_node, long_long_unsigned_type_node,
14826 NULL_TREE);
14827 tree void_ftype_pv2di_v2di
14828 = build_function_type_list (void_type_node,
14829 pv2di_type_node, V2DI_type_node, NULL_TREE);
14830 /* Normal vector unops. */
14831 tree v4sf_ftype_v4sf
14832 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14834 /* Normal vector binops. */
14835 tree v4sf_ftype_v4sf_v4sf
14836 = build_function_type_list (V4SF_type_node,
14837 V4SF_type_node, V4SF_type_node, NULL_TREE);
14838 tree v8qi_ftype_v8qi_v8qi
14839 = build_function_type_list (V8QI_type_node,
14840 V8QI_type_node, V8QI_type_node, NULL_TREE);
14841 tree v4hi_ftype_v4hi_v4hi
14842 = build_function_type_list (V4HI_type_node,
14843 V4HI_type_node, V4HI_type_node, NULL_TREE);
14844 tree v2si_ftype_v2si_v2si
14845 = build_function_type_list (V2SI_type_node,
14846 V2SI_type_node, V2SI_type_node, NULL_TREE);
14847 tree di_ftype_di_di
14848 = build_function_type_list (long_long_unsigned_type_node,
14849 long_long_unsigned_type_node,
14850 long_long_unsigned_type_node, NULL_TREE);
14852 tree v2si_ftype_v2sf
14853 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
14854 tree v2sf_ftype_v2si
14855 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
14856 tree v2si_ftype_v2si
14857 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
14858 tree v2sf_ftype_v2sf
14859 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
14860 tree v2sf_ftype_v2sf_v2sf
14861 = build_function_type_list (V2SF_type_node,
14862 V2SF_type_node, V2SF_type_node, NULL_TREE);
14863 tree v2si_ftype_v2sf_v2sf
14864 = build_function_type_list (V2SI_type_node,
14865 V2SF_type_node, V2SF_type_node, NULL_TREE);
14866 tree pint_type_node = build_pointer_type (integer_type_node);
14867 tree pdouble_type_node = build_pointer_type (double_type_node);
14868 tree pcdouble_type_node = build_pointer_type (
14869 build_type_variant (double_type_node, 1, 0));
14870 tree int_ftype_v2df_v2df
14871 = build_function_type_list (integer_type_node,
14872 V2DF_type_node, V2DF_type_node, NULL_TREE);
14874 tree void_ftype_pcvoid
14875 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
14876 tree v4sf_ftype_v4si
14877 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
14878 tree v4si_ftype_v4sf
14879 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
14880 tree v2df_ftype_v4si
14881 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
14882 tree v4si_ftype_v2df
14883 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
14884 tree v2si_ftype_v2df
14885 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
14886 tree v4sf_ftype_v2df
14887 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
14888 tree v2df_ftype_v2si
14889 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
14890 tree v2df_ftype_v4sf
14891 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
14892 tree int_ftype_v2df
14893 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
14894 tree int64_ftype_v2df
14895 = build_function_type_list (long_long_integer_type_node,
14896 V2DF_type_node, NULL_TREE);
14897 tree v2df_ftype_v2df_int
14898 = build_function_type_list (V2DF_type_node,
14899 V2DF_type_node, integer_type_node, NULL_TREE);
14900 tree v2df_ftype_v2df_int64
14901 = build_function_type_list (V2DF_type_node,
14902 V2DF_type_node, long_long_integer_type_node,
14903 NULL_TREE);
14904 tree v4sf_ftype_v4sf_v2df
14905 = build_function_type_list (V4SF_type_node,
14906 V4SF_type_node, V2DF_type_node, NULL_TREE);
14907 tree v2df_ftype_v2df_v4sf
14908 = build_function_type_list (V2DF_type_node,
14909 V2DF_type_node, V4SF_type_node, NULL_TREE);
14910 tree v2df_ftype_v2df_v2df_int
14911 = build_function_type_list (V2DF_type_node,
14912 V2DF_type_node, V2DF_type_node,
14913 integer_type_node,
14914 NULL_TREE);
14915 tree v2df_ftype_v2df_pcdouble
14916 = build_function_type_list (V2DF_type_node,
14917 V2DF_type_node, pcdouble_type_node, NULL_TREE);
14918 tree void_ftype_pdouble_v2df
14919 = build_function_type_list (void_type_node,
14920 pdouble_type_node, V2DF_type_node, NULL_TREE);
14921 tree void_ftype_pint_int
14922 = build_function_type_list (void_type_node,
14923 pint_type_node, integer_type_node, NULL_TREE);
14924 tree void_ftype_v16qi_v16qi_pchar
14925 = build_function_type_list (void_type_node,
14926 V16QI_type_node, V16QI_type_node,
14927 pchar_type_node, NULL_TREE);
14928 tree v2df_ftype_pcdouble
14929 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
14930 tree v2df_ftype_v2df_v2df
14931 = build_function_type_list (V2DF_type_node,
14932 V2DF_type_node, V2DF_type_node, NULL_TREE);
14933 tree v16qi_ftype_v16qi_v16qi
14934 = build_function_type_list (V16QI_type_node,
14935 V16QI_type_node, V16QI_type_node, NULL_TREE);
14936 tree v8hi_ftype_v8hi_v8hi
14937 = build_function_type_list (V8HI_type_node,
14938 V8HI_type_node, V8HI_type_node, NULL_TREE);
14939 tree v4si_ftype_v4si_v4si
14940 = build_function_type_list (V4SI_type_node,
14941 V4SI_type_node, V4SI_type_node, NULL_TREE);
14942 tree v2di_ftype_v2di_v2di
14943 = build_function_type_list (V2DI_type_node,
14944 V2DI_type_node, V2DI_type_node, NULL_TREE);
14945 tree v2di_ftype_v2df_v2df
14946 = build_function_type_list (V2DI_type_node,
14947 V2DF_type_node, V2DF_type_node, NULL_TREE);
14948 tree v2df_ftype_v2df
14949 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14950 tree v2di_ftype_v2di_int
14951 = build_function_type_list (V2DI_type_node,
14952 V2DI_type_node, integer_type_node, NULL_TREE);
14953 tree v4si_ftype_v4si_int
14954 = build_function_type_list (V4SI_type_node,
14955 V4SI_type_node, integer_type_node, NULL_TREE);
14956 tree v8hi_ftype_v8hi_int
14957 = build_function_type_list (V8HI_type_node,
14958 V8HI_type_node, integer_type_node, NULL_TREE);
14959 tree v8hi_ftype_v8hi_v2di
14960 = build_function_type_list (V8HI_type_node,
14961 V8HI_type_node, V2DI_type_node, NULL_TREE);
14962 tree v4si_ftype_v4si_v2di
14963 = build_function_type_list (V4SI_type_node,
14964 V4SI_type_node, V2DI_type_node, NULL_TREE);
14965 tree v4si_ftype_v8hi_v8hi
14966 = build_function_type_list (V4SI_type_node,
14967 V8HI_type_node, V8HI_type_node, NULL_TREE);
14968 tree di_ftype_v8qi_v8qi
14969 = build_function_type_list (long_long_unsigned_type_node,
14970 V8QI_type_node, V8QI_type_node, NULL_TREE);
14971 tree di_ftype_v2si_v2si
14972 = build_function_type_list (long_long_unsigned_type_node,
14973 V2SI_type_node, V2SI_type_node, NULL_TREE);
14974 tree v2di_ftype_v16qi_v16qi
14975 = build_function_type_list (V2DI_type_node,
14976 V16QI_type_node, V16QI_type_node, NULL_TREE);
14977 tree v2di_ftype_v4si_v4si
14978 = build_function_type_list (V2DI_type_node,
14979 V4SI_type_node, V4SI_type_node, NULL_TREE);
14980 tree int_ftype_v16qi
14981 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14982 tree v16qi_ftype_pcchar
14983 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14984 tree void_ftype_pchar_v16qi
14985 = build_function_type_list (void_type_node,
14986 pchar_type_node, V16QI_type_node, NULL_TREE);
14988 tree float80_type;
14989 tree float128_type;
14990 tree ftype;
14992 /* The __float80 type. */
14993 if (TYPE_MODE (long_double_type_node) == XFmode)
14994 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14995 "__float80");
14996 else
14998 /* The __float80 type. */
14999 float80_type = make_node (REAL_TYPE);
15000 TYPE_PRECISION (float80_type) = 80;
15001 layout_type (float80_type);
15002 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15005 if (TARGET_64BIT)
15007 float128_type = make_node (REAL_TYPE);
15008 TYPE_PRECISION (float128_type) = 128;
15009 layout_type (float128_type);
15010 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15013 /* Add all builtins that are more or less simple operations on two
15014 operands. */
15015 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15017 /* Use one of the operands; the target can have a different mode for
15018 mask-generating compares. */
15019 enum machine_mode mode;
15020 tree type;
15022 if (d->name == 0)
15023 continue;
15024 mode = insn_data[d->icode].operand[1].mode;
15026 switch (mode)
15028 case V16QImode:
15029 type = v16qi_ftype_v16qi_v16qi;
15030 break;
15031 case V8HImode:
15032 type = v8hi_ftype_v8hi_v8hi;
15033 break;
15034 case V4SImode:
15035 type = v4si_ftype_v4si_v4si;
15036 break;
15037 case V2DImode:
15038 type = v2di_ftype_v2di_v2di;
15039 break;
15040 case V2DFmode:
15041 type = v2df_ftype_v2df_v2df;
15042 break;
15043 case V4SFmode:
15044 type = v4sf_ftype_v4sf_v4sf;
15045 break;
15046 case V8QImode:
15047 type = v8qi_ftype_v8qi_v8qi;
15048 break;
15049 case V4HImode:
15050 type = v4hi_ftype_v4hi_v4hi;
15051 break;
15052 case V2SImode:
15053 type = v2si_ftype_v2si_v2si;
15054 break;
15055 case DImode:
15056 type = di_ftype_di_di;
15057 break;
15059 default:
15060 gcc_unreachable ();
15063 /* Override for comparisons. */
15064 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15065 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15066 type = v4si_ftype_v4sf_v4sf;
15068 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15069 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15070 type = v2di_ftype_v2df_v2df;
15072 def_builtin (d->mask, d->name, type, d->code);
15075 /* Add the remaining MMX insns with somewhat more complicated types. */
15076 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15077 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15078 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15079 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15081 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15082 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15083 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15085 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15086 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15088 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15089 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15091 /* comi/ucomi insns. */
15092 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15093 if (d->mask == MASK_SSE2)
15094 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15095 else
15096 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15098 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15099 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15100 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15102 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15103 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15104 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15105 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15106 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15107 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15108 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15109 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15110 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15111 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15112 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15114 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15116 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15117 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15119 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15120 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15121 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15122 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15124 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15125 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15126 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15127 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15129 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15131 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15133 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15134 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15135 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15136 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15137 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15138 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15140 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15142 /* Original 3DNow! */
15143 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15144 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15145 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15146 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15147 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15148 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15149 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15150 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15151 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15152 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15153 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15154 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15155 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15156 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15157 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15158 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15159 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15160 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15161 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15162 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15164 /* 3DNow! extension as used in the Athlon CPU. */
15165 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15166 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15167 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15168 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15169 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15170 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15172 /* SSE2 */
15173 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15175 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15176 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15178 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15179 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15181 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15182 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15183 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15184 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15185 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15187 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15188 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15189 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15190 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15192 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15193 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15195 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15197 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15198 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15200 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15201 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15202 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15203 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15204 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15206 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15208 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15209 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15210 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15211 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15213 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15214 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15215 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15217 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15218 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15219 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15220 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15222 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15223 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15224 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15226 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15227 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15229 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15230 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15232 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15233 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15234 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15236 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15237 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15238 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15240 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15241 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15243 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15244 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15245 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15246 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15248 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15249 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15250 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15251 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15253 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15254 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15256 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15258 /* Prescott New Instructions. */
15259 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15260 void_ftype_pcvoid_unsigned_unsigned,
15261 IX86_BUILTIN_MONITOR);
15262 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15263 void_ftype_unsigned_unsigned,
15264 IX86_BUILTIN_MWAIT);
15265 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15266 v4sf_ftype_v4sf,
15267 IX86_BUILTIN_MOVSHDUP);
15268 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15269 v4sf_ftype_v4sf,
15270 IX86_BUILTIN_MOVSLDUP);
15271 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15272 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15274 /* Access to the vec_init patterns. */
15275 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15276 integer_type_node, NULL_TREE);
15277 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15278 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15280 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15281 short_integer_type_node,
15282 short_integer_type_node,
15283 short_integer_type_node, NULL_TREE);
15284 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15285 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15287 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15288 char_type_node, char_type_node,
15289 char_type_node, char_type_node,
15290 char_type_node, char_type_node,
15291 char_type_node, NULL_TREE);
15292 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15293 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15295 /* Access to the vec_extract patterns. */
15296 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15297 integer_type_node, NULL_TREE);
15298 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15299 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15301 ftype = build_function_type_list (long_long_integer_type_node,
15302 V2DI_type_node, integer_type_node,
15303 NULL_TREE);
15304 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15305 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15307 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15308 integer_type_node, NULL_TREE);
15309 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15310 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15312 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15313 integer_type_node, NULL_TREE);
15314 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15315 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15317 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15318 integer_type_node, NULL_TREE);
15319 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15320 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15322 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15323 integer_type_node, NULL_TREE);
15324 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15325 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15327 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15328 integer_type_node, NULL_TREE);
15329 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15330 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15332 /* Access to the vec_set patterns. */
15333 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15334 intHI_type_node,
15335 integer_type_node, NULL_TREE);
15336 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15337 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15339 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15340 intHI_type_node,
15341 integer_type_node, NULL_TREE);
15342 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15343 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15345 #undef def_builtin
15347 /* Set up all the SSE ABI builtins that we may use to override
15348 the normal builtins. */
15349 static void
15350 ix86_init_sse_abi_builtins (void)
15352 tree flt, flt2;
15354 /* Bail out in case the template definitions are not available. */
15355 if (! built_in_decls [BUILT_IN_SIN]
15356 || ! built_in_decls [BUILT_IN_SINF]
15357 || ! built_in_decls [BUILT_IN_ATAN2]
15358 || ! built_in_decls [BUILT_IN_ATAN2F])
15359 return;
15361 /* Build the function types as variants of the existing ones. */
15362 flt = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_SINF]));
15363 TYPE_ATTRIBUTES (flt)
15364 = tree_cons (get_identifier ("sseregparm"),
15365 NULL_TREE, TYPE_ATTRIBUTES (flt));
15366 flt2 = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_ATAN2F]));
15367 TYPE_ATTRIBUTES (flt2)
15368 = tree_cons (get_identifier ("sseregparm"),
15369 NULL_TREE, TYPE_ATTRIBUTES (flt2));
15371 #define def_builtin(capname, name, type) \
15372 ix86_builtin_function_variants [BUILT_IN_ ## capname] \
15373 = lang_hooks.builtin_function ("__builtin_sse2_" # name, type, \
15374 IX86_BUILTIN_SSE2_ ## capname, \
15375 BUILT_IN_NORMAL, \
15376 "__libm_sse2_" # name, NULL_TREE)
15378 def_builtin (ACOSF, acosf, flt);
15379 def_builtin (ASINF, asinf, flt);
15380 def_builtin (ATANF, atanf, flt);
15381 def_builtin (ATAN2F, atan2f, flt2);
15382 def_builtin (COSF, cosf, flt);
15383 def_builtin (EXPF, expf, flt);
15384 def_builtin (LOG10F, log10f, flt);
15385 def_builtin (LOGF, logf, flt);
15386 def_builtin (SINF, sinf, flt);
15387 def_builtin (TANF, tanf, flt);
15389 #undef def_builtin
15392 /* Errors in the source file can cause expand_expr to return const0_rtx
15393 where we expect a vector. To avoid crashing, use one of the vector
15394 clear instructions. */
15395 static rtx
15396 safe_vector_operand (rtx x, enum machine_mode mode)
15398 if (x == const0_rtx)
15399 x = CONST0_RTX (mode);
15400 return x;
15403 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15405 static rtx
15406 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15408 rtx pat, xops[3];
15409 tree arg0 = TREE_VALUE (arglist);
15410 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15411 rtx op0 = expand_normal (arg0);
15412 rtx op1 = expand_normal (arg1);
15413 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15414 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15415 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15417 if (VECTOR_MODE_P (mode0))
15418 op0 = safe_vector_operand (op0, mode0);
15419 if (VECTOR_MODE_P (mode1))
15420 op1 = safe_vector_operand (op1, mode1);
15422 if (optimize || !target
15423 || GET_MODE (target) != tmode
15424 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15425 target = gen_reg_rtx (tmode);
15427 if (GET_MODE (op1) == SImode && mode1 == TImode)
15429 rtx x = gen_reg_rtx (V4SImode);
15430 emit_insn (gen_sse2_loadd (x, op1));
15431 op1 = gen_lowpart (TImode, x);
15434 /* The insn must want input operands in the same modes as the
15435 result. */
15436 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15437 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15439 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15440 op0 = copy_to_mode_reg (mode0, op0);
15441 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15442 op1 = copy_to_mode_reg (mode1, op1);
15444 /* ??? Using ix86_fixup_binary_operands is problematic when
15445 we've got mismatched modes. Fake it. */
15447 xops[0] = target;
15448 xops[1] = op0;
15449 xops[2] = op1;
15451 if (tmode == mode0 && tmode == mode1)
15453 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15454 op0 = xops[1];
15455 op1 = xops[2];
15457 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15459 op0 = force_reg (mode0, op0);
15460 op1 = force_reg (mode1, op1);
15461 target = gen_reg_rtx (tmode);
15464 pat = GEN_FCN (icode) (target, op0, op1);
15465 if (! pat)
15466 return 0;
15467 emit_insn (pat);
15468 return target;
15471 /* Subroutine of ix86_expand_builtin to take care of stores. */
15473 static rtx
15474 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15476 rtx pat;
15477 tree arg0 = TREE_VALUE (arglist);
15478 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15479 rtx op0 = expand_normal (arg0);
15480 rtx op1 = expand_normal (arg1);
15481 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15482 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15484 if (VECTOR_MODE_P (mode1))
15485 op1 = safe_vector_operand (op1, mode1);
15487 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15488 op1 = copy_to_mode_reg (mode1, op1);
15490 pat = GEN_FCN (icode) (op0, op1);
15491 if (pat)
15492 emit_insn (pat);
15493 return 0;
15496 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15498 static rtx
15499 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15500 rtx target, int do_load)
15502 rtx pat;
15503 tree arg0 = TREE_VALUE (arglist);
15504 rtx op0 = expand_normal (arg0);
15505 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15506 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15508 if (optimize || !target
15509 || GET_MODE (target) != tmode
15510 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15511 target = gen_reg_rtx (tmode);
15512 if (do_load)
15513 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15514 else
15516 if (VECTOR_MODE_P (mode0))
15517 op0 = safe_vector_operand (op0, mode0);
15519 if ((optimize && !register_operand (op0, mode0))
15520 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15521 op0 = copy_to_mode_reg (mode0, op0);
15524 pat = GEN_FCN (icode) (target, op0);
15525 if (! pat)
15526 return 0;
15527 emit_insn (pat);
15528 return target;
15531 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15532 sqrtss, rsqrtss, rcpss. */
15534 static rtx
15535 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15537 rtx pat;
15538 tree arg0 = TREE_VALUE (arglist);
15539 rtx op1, op0 = expand_normal (arg0);
15540 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15541 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15543 if (optimize || !target
15544 || GET_MODE (target) != tmode
15545 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15546 target = gen_reg_rtx (tmode);
15548 if (VECTOR_MODE_P (mode0))
15549 op0 = safe_vector_operand (op0, mode0);
15551 if ((optimize && !register_operand (op0, mode0))
15552 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15553 op0 = copy_to_mode_reg (mode0, op0);
15555 op1 = op0;
15556 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15557 op1 = copy_to_mode_reg (mode0, op1);
15559 pat = GEN_FCN (icode) (target, op0, op1);
15560 if (! pat)
15561 return 0;
15562 emit_insn (pat);
15563 return target;
15566 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15568 static rtx
15569 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15570 rtx target)
15572 rtx pat;
15573 tree arg0 = TREE_VALUE (arglist);
15574 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15575 rtx op0 = expand_normal (arg0);
15576 rtx op1 = expand_normal (arg1);
15577 rtx op2;
15578 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15579 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15580 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15581 enum rtx_code comparison = d->comparison;
15583 if (VECTOR_MODE_P (mode0))
15584 op0 = safe_vector_operand (op0, mode0);
15585 if (VECTOR_MODE_P (mode1))
15586 op1 = safe_vector_operand (op1, mode1);
15588 /* Swap operands if we have a comparison that isn't available in
15589 hardware. */
15590 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15592 rtx tmp = gen_reg_rtx (mode1);
15593 emit_move_insn (tmp, op1);
15594 op1 = op0;
15595 op0 = tmp;
15598 if (optimize || !target
15599 || GET_MODE (target) != tmode
15600 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15601 target = gen_reg_rtx (tmode);
15603 if ((optimize && !register_operand (op0, mode0))
15604 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15605 op0 = copy_to_mode_reg (mode0, op0);
15606 if ((optimize && !register_operand (op1, mode1))
15607 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15608 op1 = copy_to_mode_reg (mode1, op1);
15610 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15611 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15612 if (! pat)
15613 return 0;
15614 emit_insn (pat);
15615 return target;
15618 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15620 static rtx
15621 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15622 rtx target)
15624 rtx pat;
15625 tree arg0 = TREE_VALUE (arglist);
15626 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15627 rtx op0 = expand_normal (arg0);
15628 rtx op1 = expand_normal (arg1);
15629 rtx op2;
15630 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15631 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15632 enum rtx_code comparison = d->comparison;
15634 if (VECTOR_MODE_P (mode0))
15635 op0 = safe_vector_operand (op0, mode0);
15636 if (VECTOR_MODE_P (mode1))
15637 op1 = safe_vector_operand (op1, mode1);
15639 /* Swap operands if we have a comparison that isn't available in
15640 hardware. */
15641 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15643 rtx tmp = op1;
15644 op1 = op0;
15645 op0 = tmp;
15648 target = gen_reg_rtx (SImode);
15649 emit_move_insn (target, const0_rtx);
15650 target = gen_rtx_SUBREG (QImode, target, 0);
15652 if ((optimize && !register_operand (op0, mode0))
15653 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15654 op0 = copy_to_mode_reg (mode0, op0);
15655 if ((optimize && !register_operand (op1, mode1))
15656 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15657 op1 = copy_to_mode_reg (mode1, op1);
15659 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15660 pat = GEN_FCN (d->icode) (op0, op1);
15661 if (! pat)
15662 return 0;
15663 emit_insn (pat);
15664 emit_insn (gen_rtx_SET (VOIDmode,
15665 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15666 gen_rtx_fmt_ee (comparison, QImode,
15667 SET_DEST (pat),
15668 const0_rtx)));
15670 return SUBREG_REG (target);
15673 /* Return the integer constant in ARG. Constrain it to be in the range
15674 of the subparts of VEC_TYPE; issue an error if not. */
15676 static int
15677 get_element_number (tree vec_type, tree arg)
15679 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15681 if (!host_integerp (arg, 1)
15682 || (elt = tree_low_cst (arg, 1), elt > max))
15684 error ("selector must be an integer constant in the range 0..%wi", max);
15685 return 0;
15688 return elt;
15691 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15692 ix86_expand_vector_init. We DO have language-level syntax for this, in
15693 the form of (type){ init-list }. Except that since we can't place emms
15694 instructions from inside the compiler, we can't allow the use of MMX
15695 registers unless the user explicitly asks for it. So we do *not* define
15696 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15697 we have builtins invoked by mmintrin.h that gives us license to emit
15698 these sorts of instructions. */
15700 static rtx
15701 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15703 enum machine_mode tmode = TYPE_MODE (type);
15704 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15705 int i, n_elt = GET_MODE_NUNITS (tmode);
15706 rtvec v = rtvec_alloc (n_elt);
15708 gcc_assert (VECTOR_MODE_P (tmode));
15710 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15712 rtx x = expand_normal (TREE_VALUE (arglist));
15713 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15716 gcc_assert (arglist == NULL);
15718 if (!target || !register_operand (target, tmode))
15719 target = gen_reg_rtx (tmode);
15721 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15722 return target;
15725 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15726 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15727 had a language-level syntax for referencing vector elements. */
15729 static rtx
15730 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15732 enum machine_mode tmode, mode0;
15733 tree arg0, arg1;
15734 int elt;
15735 rtx op0;
15737 arg0 = TREE_VALUE (arglist);
15738 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15740 op0 = expand_normal (arg0);
15741 elt = get_element_number (TREE_TYPE (arg0), arg1);
15743 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15744 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15745 gcc_assert (VECTOR_MODE_P (mode0));
15747 op0 = force_reg (mode0, op0);
15749 if (optimize || !target || !register_operand (target, tmode))
15750 target = gen_reg_rtx (tmode);
15752 ix86_expand_vector_extract (true, target, op0, elt);
15754 return target;
15757 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15758 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15759 a language-level syntax for referencing vector elements. */
15761 static rtx
15762 ix86_expand_vec_set_builtin (tree arglist)
15764 enum machine_mode tmode, mode1;
15765 tree arg0, arg1, arg2;
15766 int elt;
15767 rtx op0, op1;
15769 arg0 = TREE_VALUE (arglist);
15770 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15771 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15773 tmode = TYPE_MODE (TREE_TYPE (arg0));
15774 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15775 gcc_assert (VECTOR_MODE_P (tmode));
15777 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15778 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15779 elt = get_element_number (TREE_TYPE (arg0), arg2);
15781 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15782 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15784 op0 = force_reg (tmode, op0);
15785 op1 = force_reg (mode1, op1);
15787 ix86_expand_vector_set (true, op0, op1, elt);
15789 return op0;
15792 /* Expand an expression EXP that calls a built-in function,
15793 with result going to TARGET if that's convenient
15794 (and in mode MODE if that's convenient).
15795 SUBTARGET may be used as the target for computing one of EXP's operands.
15796 IGNORE is nonzero if the value is to be ignored. */
15798 static rtx
15799 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15800 enum machine_mode mode ATTRIBUTE_UNUSED,
15801 int ignore ATTRIBUTE_UNUSED)
15803 const struct builtin_description *d;
15804 size_t i;
15805 enum insn_code icode;
15806 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15807 tree arglist = TREE_OPERAND (exp, 1);
15808 tree arg0, arg1, arg2;
15809 rtx op0, op1, op2, pat;
15810 enum machine_mode tmode, mode0, mode1, mode2;
15811 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15813 switch (fcode)
15815 case IX86_BUILTIN_EMMS:
15816 emit_insn (gen_mmx_emms ());
15817 return 0;
15819 case IX86_BUILTIN_SFENCE:
15820 emit_insn (gen_sse_sfence ());
15821 return 0;
15823 case IX86_BUILTIN_MASKMOVQ:
15824 case IX86_BUILTIN_MASKMOVDQU:
15825 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15826 ? CODE_FOR_mmx_maskmovq
15827 : CODE_FOR_sse2_maskmovdqu);
15828 /* Note the arg order is different from the operand order. */
15829 arg1 = TREE_VALUE (arglist);
15830 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15831 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15832 op0 = expand_normal (arg0);
15833 op1 = expand_normal (arg1);
15834 op2 = expand_normal (arg2);
15835 mode0 = insn_data[icode].operand[0].mode;
15836 mode1 = insn_data[icode].operand[1].mode;
15837 mode2 = insn_data[icode].operand[2].mode;
15839 op0 = force_reg (Pmode, op0);
15840 op0 = gen_rtx_MEM (mode1, op0);
15842 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15843 op0 = copy_to_mode_reg (mode0, op0);
15844 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15845 op1 = copy_to_mode_reg (mode1, op1);
15846 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15847 op2 = copy_to_mode_reg (mode2, op2);
15848 pat = GEN_FCN (icode) (op0, op1, op2);
15849 if (! pat)
15850 return 0;
15851 emit_insn (pat);
15852 return 0;
15854 case IX86_BUILTIN_SQRTSS:
15855 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15856 case IX86_BUILTIN_RSQRTSS:
15857 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15858 case IX86_BUILTIN_RCPSS:
15859 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15861 case IX86_BUILTIN_LOADUPS:
15862 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15864 case IX86_BUILTIN_STOREUPS:
15865 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15867 case IX86_BUILTIN_LOADHPS:
15868 case IX86_BUILTIN_LOADLPS:
15869 case IX86_BUILTIN_LOADHPD:
15870 case IX86_BUILTIN_LOADLPD:
15871 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
15872 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
15873 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
15874 : CODE_FOR_sse2_loadlpd);
15875 arg0 = TREE_VALUE (arglist);
15876 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15877 op0 = expand_normal (arg0);
15878 op1 = expand_normal (arg1);
15879 tmode = insn_data[icode].operand[0].mode;
15880 mode0 = insn_data[icode].operand[1].mode;
15881 mode1 = insn_data[icode].operand[2].mode;
15883 op0 = force_reg (mode0, op0);
15884 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
15885 if (optimize || target == 0
15886 || GET_MODE (target) != tmode
15887 || !register_operand (target, tmode))
15888 target = gen_reg_rtx (tmode);
15889 pat = GEN_FCN (icode) (target, op0, op1);
15890 if (! pat)
15891 return 0;
15892 emit_insn (pat);
15893 return target;
15895 case IX86_BUILTIN_STOREHPS:
15896 case IX86_BUILTIN_STORELPS:
15897 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
15898 : CODE_FOR_sse_storelps);
15899 arg0 = TREE_VALUE (arglist);
15900 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15901 op0 = expand_normal (arg0);
15902 op1 = expand_normal (arg1);
15903 mode0 = insn_data[icode].operand[0].mode;
15904 mode1 = insn_data[icode].operand[1].mode;
15906 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15907 op1 = force_reg (mode1, op1);
15909 pat = GEN_FCN (icode) (op0, op1);
15910 if (! pat)
15911 return 0;
15912 emit_insn (pat);
15913 return const0_rtx;
15915 case IX86_BUILTIN_MOVNTPS:
15916 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
15917 case IX86_BUILTIN_MOVNTQ:
15918 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
15920 case IX86_BUILTIN_LDMXCSR:
15921 op0 = expand_normal (TREE_VALUE (arglist));
15922 target = assign_386_stack_local (SImode, SLOT_TEMP);
15923 emit_move_insn (target, op0);
15924 emit_insn (gen_sse_ldmxcsr (target));
15925 return 0;
15927 case IX86_BUILTIN_STMXCSR:
15928 target = assign_386_stack_local (SImode, SLOT_TEMP);
15929 emit_insn (gen_sse_stmxcsr (target));
15930 return copy_to_mode_reg (SImode, target);
15932 case IX86_BUILTIN_SHUFPS:
15933 case IX86_BUILTIN_SHUFPD:
15934 icode = (fcode == IX86_BUILTIN_SHUFPS
15935 ? CODE_FOR_sse_shufps
15936 : CODE_FOR_sse2_shufpd);
15937 arg0 = TREE_VALUE (arglist);
15938 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15939 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15940 op0 = expand_normal (arg0);
15941 op1 = expand_normal (arg1);
15942 op2 = expand_normal (arg2);
15943 tmode = insn_data[icode].operand[0].mode;
15944 mode0 = insn_data[icode].operand[1].mode;
15945 mode1 = insn_data[icode].operand[2].mode;
15946 mode2 = insn_data[icode].operand[3].mode;
15948 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15949 op0 = copy_to_mode_reg (mode0, op0);
15950 if ((optimize && !register_operand (op1, mode1))
15951 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
15952 op1 = copy_to_mode_reg (mode1, op1);
15953 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15955 /* @@@ better error message */
15956 error ("mask must be an immediate");
15957 return gen_reg_rtx (tmode);
15959 if (optimize || target == 0
15960 || GET_MODE (target) != tmode
15961 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15962 target = gen_reg_rtx (tmode);
15963 pat = GEN_FCN (icode) (target, op0, op1, op2);
15964 if (! pat)
15965 return 0;
15966 emit_insn (pat);
15967 return target;
15969 case IX86_BUILTIN_PSHUFW:
15970 case IX86_BUILTIN_PSHUFD:
15971 case IX86_BUILTIN_PSHUFHW:
15972 case IX86_BUILTIN_PSHUFLW:
15973 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15974 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15975 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15976 : CODE_FOR_mmx_pshufw);
15977 arg0 = TREE_VALUE (arglist);
15978 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15979 op0 = expand_normal (arg0);
15980 op1 = expand_normal (arg1);
15981 tmode = insn_data[icode].operand[0].mode;
15982 mode1 = insn_data[icode].operand[1].mode;
15983 mode2 = insn_data[icode].operand[2].mode;
15985 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15986 op0 = copy_to_mode_reg (mode1, op0);
15987 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15989 /* @@@ better error message */
15990 error ("mask must be an immediate");
15991 return const0_rtx;
15993 if (target == 0
15994 || GET_MODE (target) != tmode
15995 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15996 target = gen_reg_rtx (tmode);
15997 pat = GEN_FCN (icode) (target, op0, op1);
15998 if (! pat)
15999 return 0;
16000 emit_insn (pat);
16001 return target;
16003 case IX86_BUILTIN_PSLLDQI128:
16004 case IX86_BUILTIN_PSRLDQI128:
16005 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16006 : CODE_FOR_sse2_lshrti3);
16007 arg0 = TREE_VALUE (arglist);
16008 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16009 op0 = expand_normal (arg0);
16010 op1 = expand_normal (arg1);
16011 tmode = insn_data[icode].operand[0].mode;
16012 mode1 = insn_data[icode].operand[1].mode;
16013 mode2 = insn_data[icode].operand[2].mode;
16015 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16017 op0 = copy_to_reg (op0);
16018 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16020 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16022 error ("shift must be an immediate");
16023 return const0_rtx;
16025 target = gen_reg_rtx (V2DImode);
16026 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16027 if (! pat)
16028 return 0;
16029 emit_insn (pat);
16030 return target;
16032 case IX86_BUILTIN_FEMMS:
16033 emit_insn (gen_mmx_femms ());
16034 return NULL_RTX;
16036 case IX86_BUILTIN_PAVGUSB:
16037 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16039 case IX86_BUILTIN_PF2ID:
16040 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16042 case IX86_BUILTIN_PFACC:
16043 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16045 case IX86_BUILTIN_PFADD:
16046 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16048 case IX86_BUILTIN_PFCMPEQ:
16049 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16051 case IX86_BUILTIN_PFCMPGE:
16052 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16054 case IX86_BUILTIN_PFCMPGT:
16055 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16057 case IX86_BUILTIN_PFMAX:
16058 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16060 case IX86_BUILTIN_PFMIN:
16061 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16063 case IX86_BUILTIN_PFMUL:
16064 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16066 case IX86_BUILTIN_PFRCP:
16067 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16069 case IX86_BUILTIN_PFRCPIT1:
16070 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16072 case IX86_BUILTIN_PFRCPIT2:
16073 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16075 case IX86_BUILTIN_PFRSQIT1:
16076 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16078 case IX86_BUILTIN_PFRSQRT:
16079 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16081 case IX86_BUILTIN_PFSUB:
16082 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16084 case IX86_BUILTIN_PFSUBR:
16085 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16087 case IX86_BUILTIN_PI2FD:
16088 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16090 case IX86_BUILTIN_PMULHRW:
16091 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16093 case IX86_BUILTIN_PF2IW:
16094 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16096 case IX86_BUILTIN_PFNACC:
16097 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16099 case IX86_BUILTIN_PFPNACC:
16100 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16102 case IX86_BUILTIN_PI2FW:
16103 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16105 case IX86_BUILTIN_PSWAPDSI:
16106 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16108 case IX86_BUILTIN_PSWAPDSF:
16109 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16111 case IX86_BUILTIN_SQRTSD:
16112 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16113 case IX86_BUILTIN_LOADUPD:
16114 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16115 case IX86_BUILTIN_STOREUPD:
16116 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16118 case IX86_BUILTIN_MFENCE:
16119 emit_insn (gen_sse2_mfence ());
16120 return 0;
16121 case IX86_BUILTIN_LFENCE:
16122 emit_insn (gen_sse2_lfence ());
16123 return 0;
16125 case IX86_BUILTIN_CLFLUSH:
16126 arg0 = TREE_VALUE (arglist);
16127 op0 = expand_normal (arg0);
16128 icode = CODE_FOR_sse2_clflush;
16129 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16130 op0 = copy_to_mode_reg (Pmode, op0);
16132 emit_insn (gen_sse2_clflush (op0));
16133 return 0;
16135 case IX86_BUILTIN_MOVNTPD:
16136 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16137 case IX86_BUILTIN_MOVNTDQ:
16138 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16139 case IX86_BUILTIN_MOVNTI:
16140 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16142 case IX86_BUILTIN_LOADDQU:
16143 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16144 case IX86_BUILTIN_STOREDQU:
16145 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16147 case IX86_BUILTIN_MONITOR:
16148 arg0 = TREE_VALUE (arglist);
16149 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16150 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16151 op0 = expand_normal (arg0);
16152 op1 = expand_normal (arg1);
16153 op2 = expand_normal (arg2);
16154 if (!REG_P (op0))
16155 op0 = copy_to_mode_reg (SImode, op0);
16156 if (!REG_P (op1))
16157 op1 = copy_to_mode_reg (SImode, op1);
16158 if (!REG_P (op2))
16159 op2 = copy_to_mode_reg (SImode, op2);
16160 emit_insn (gen_sse3_monitor (op0, op1, op2));
16161 return 0;
16163 case IX86_BUILTIN_MWAIT:
16164 arg0 = TREE_VALUE (arglist);
16165 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16166 op0 = expand_normal (arg0);
16167 op1 = expand_normal (arg1);
16168 if (!REG_P (op0))
16169 op0 = copy_to_mode_reg (SImode, op0);
16170 if (!REG_P (op1))
16171 op1 = copy_to_mode_reg (SImode, op1);
16172 emit_insn (gen_sse3_mwait (op0, op1));
16173 return 0;
16175 case IX86_BUILTIN_LDDQU:
16176 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16177 target, 1);
16179 case IX86_BUILTIN_VEC_INIT_V2SI:
16180 case IX86_BUILTIN_VEC_INIT_V4HI:
16181 case IX86_BUILTIN_VEC_INIT_V8QI:
16182 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16184 case IX86_BUILTIN_VEC_EXT_V2DF:
16185 case IX86_BUILTIN_VEC_EXT_V2DI:
16186 case IX86_BUILTIN_VEC_EXT_V4SF:
16187 case IX86_BUILTIN_VEC_EXT_V4SI:
16188 case IX86_BUILTIN_VEC_EXT_V8HI:
16189 case IX86_BUILTIN_VEC_EXT_V2SI:
16190 case IX86_BUILTIN_VEC_EXT_V4HI:
16191 return ix86_expand_vec_ext_builtin (arglist, target);
16193 case IX86_BUILTIN_VEC_SET_V8HI:
16194 case IX86_BUILTIN_VEC_SET_V4HI:
16195 return ix86_expand_vec_set_builtin (arglist);
16197 default:
16198 break;
16201 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16202 if (d->code == fcode)
16204 /* Compares are treated specially. */
16205 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16206 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16207 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16208 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16209 return ix86_expand_sse_compare (d, arglist, target);
16211 return ix86_expand_binop_builtin (d->icode, arglist, target);
16214 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16215 if (d->code == fcode)
16216 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16218 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16219 if (d->code == fcode)
16220 return ix86_expand_sse_comi (d, arglist, target);
16222 gcc_unreachable ();
16225 /* Expand an expression EXP that calls a built-in library function,
16226 with result going to TARGET if that's convenient
16227 (and in mode MODE if that's convenient).
16228 SUBTARGET may be used as the target for computing one of EXP's operands.
16229 IGNORE is nonzero if the value is to be ignored. */
16231 static rtx
16232 ix86_expand_library_builtin (tree exp, rtx target,
16233 rtx subtarget ATTRIBUTE_UNUSED,
16234 enum machine_mode mode ATTRIBUTE_UNUSED,
16235 int ignore)
16237 enum built_in_function fncode;
16238 tree fndecl, newfn, call;
16240 /* Try expanding builtin math functions to the SSE2 ABI variants. */
16241 if (!TARGET_SSELIBM)
16242 return NULL_RTX;
16244 fncode = builtin_mathfn_code (exp);
16245 if (!ix86_builtin_function_variants [(int)fncode])
16246 return NULL_RTX;
16248 fndecl = get_callee_fndecl (exp);
16249 if (DECL_RTL_SET_P (fndecl))
16250 return NULL_RTX;
16252 /* Build the redirected call and expand it. */
16253 newfn = ix86_builtin_function_variants [(int)fncode];
16254 call = build_function_call_expr (newfn, TREE_OPERAND (exp, 1));
16255 return expand_call (call, target, ignore);
16258 /* Store OPERAND to the memory after reload is completed. This means
16259 that we can't easily use assign_stack_local. */
16261 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16263 rtx result;
16265 gcc_assert (reload_completed);
16266 if (TARGET_RED_ZONE)
16268 result = gen_rtx_MEM (mode,
16269 gen_rtx_PLUS (Pmode,
16270 stack_pointer_rtx,
16271 GEN_INT (-RED_ZONE_SIZE)));
16272 emit_move_insn (result, operand);
16274 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16276 switch (mode)
16278 case HImode:
16279 case SImode:
16280 operand = gen_lowpart (DImode, operand);
16281 /* FALLTHRU */
16282 case DImode:
16283 emit_insn (
16284 gen_rtx_SET (VOIDmode,
16285 gen_rtx_MEM (DImode,
16286 gen_rtx_PRE_DEC (DImode,
16287 stack_pointer_rtx)),
16288 operand));
16289 break;
16290 default:
16291 gcc_unreachable ();
16293 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16295 else
16297 switch (mode)
16299 case DImode:
16301 rtx operands[2];
16302 split_di (&operand, 1, operands, operands + 1);
16303 emit_insn (
16304 gen_rtx_SET (VOIDmode,
16305 gen_rtx_MEM (SImode,
16306 gen_rtx_PRE_DEC (Pmode,
16307 stack_pointer_rtx)),
16308 operands[1]));
16309 emit_insn (
16310 gen_rtx_SET (VOIDmode,
16311 gen_rtx_MEM (SImode,
16312 gen_rtx_PRE_DEC (Pmode,
16313 stack_pointer_rtx)),
16314 operands[0]));
16316 break;
16317 case HImode:
16318 /* Store HImodes as SImodes. */
16319 operand = gen_lowpart (SImode, operand);
16320 /* FALLTHRU */
16321 case SImode:
16322 emit_insn (
16323 gen_rtx_SET (VOIDmode,
16324 gen_rtx_MEM (GET_MODE (operand),
16325 gen_rtx_PRE_DEC (SImode,
16326 stack_pointer_rtx)),
16327 operand));
16328 break;
16329 default:
16330 gcc_unreachable ();
16332 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16334 return result;
16337 /* Free operand from the memory. */
16338 void
16339 ix86_free_from_memory (enum machine_mode mode)
16341 if (!TARGET_RED_ZONE)
16343 int size;
16345 if (mode == DImode || TARGET_64BIT)
16346 size = 8;
16347 else
16348 size = 4;
16349 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16350 to pop or add instruction if registers are available. */
16351 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16352 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16353 GEN_INT (size))));
16357 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16358 QImode must go into class Q_REGS.
16359 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16360 movdf to do mem-to-mem moves through integer regs. */
16361 enum reg_class
16362 ix86_preferred_reload_class (rtx x, enum reg_class class)
16364 enum machine_mode mode = GET_MODE (x);
16366 /* We're only allowed to return a subclass of CLASS. Many of the
16367 following checks fail for NO_REGS, so eliminate that early. */
16368 if (class == NO_REGS)
16369 return NO_REGS;
16371 /* All classes can load zeros. */
16372 if (x == CONST0_RTX (mode))
16373 return class;
16375 /* Force constants into memory if we are loading a (nonzero) constant into
16376 an MMX or SSE register. This is because there are no MMX/SSE instructions
16377 to load from a constant. */
16378 if (CONSTANT_P (x)
16379 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16380 return NO_REGS;
16382 /* Prefer SSE regs only, if we can use them for math. */
16383 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16384 return SSE_CLASS_P (class) ? class : NO_REGS;
16386 /* Floating-point constants need more complex checks. */
16387 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16389 /* General regs can load everything. */
16390 if (reg_class_subset_p (class, GENERAL_REGS))
16391 return class;
16393 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16394 zero above. We only want to wind up preferring 80387 registers if
16395 we plan on doing computation with them. */
16396 if (TARGET_80387
16397 && standard_80387_constant_p (x))
16399 /* Limit class to non-sse. */
16400 if (class == FLOAT_SSE_REGS)
16401 return FLOAT_REGS;
16402 if (class == FP_TOP_SSE_REGS)
16403 return FP_TOP_REG;
16404 if (class == FP_SECOND_SSE_REGS)
16405 return FP_SECOND_REG;
16406 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16407 return class;
16410 return NO_REGS;
16413 /* Generally when we see PLUS here, it's the function invariant
16414 (plus soft-fp const_int). Which can only be computed into general
16415 regs. */
16416 if (GET_CODE (x) == PLUS)
16417 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16419 /* QImode constants are easy to load, but non-constant QImode data
16420 must go into Q_REGS. */
16421 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16423 if (reg_class_subset_p (class, Q_REGS))
16424 return class;
16425 if (reg_class_subset_p (Q_REGS, class))
16426 return Q_REGS;
16427 return NO_REGS;
16430 return class;
16433 /* Discourage putting floating-point values in SSE registers unless
16434 SSE math is being used, and likewise for the 387 registers. */
16435 enum reg_class
16436 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16438 enum machine_mode mode = GET_MODE (x);
16440 /* Restrict the output reload class to the register bank that we are doing
16441 math on. If we would like not to return a subset of CLASS, reject this
16442 alternative: if reload cannot do this, it will still use its choice. */
16443 mode = GET_MODE (x);
16444 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16445 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16447 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16449 if (class == FP_TOP_SSE_REGS)
16450 return FP_TOP_REG;
16451 else if (class == FP_SECOND_SSE_REGS)
16452 return FP_SECOND_REG;
16453 else
16454 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16457 return class;
16460 /* If we are copying between general and FP registers, we need a memory
16461 location. The same is true for SSE and MMX registers.
16463 The macro can't work reliably when one of the CLASSES is class containing
16464 registers from multiple units (SSE, MMX, integer). We avoid this by never
16465 combining those units in single alternative in the machine description.
16466 Ensure that this constraint holds to avoid unexpected surprises.
16468 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16469 enforce these sanity checks. */
16472 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16473 enum machine_mode mode, int strict)
16475 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16476 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16477 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16478 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16479 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16480 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16482 gcc_assert (!strict);
16483 return true;
16486 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16487 return true;
16489 /* ??? This is a lie. We do have moves between mmx/general, and for
16490 mmx/sse2. But by saying we need secondary memory we discourage the
16491 register allocator from using the mmx registers unless needed. */
16492 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16493 return true;
16495 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16497 /* SSE1 doesn't have any direct moves from other classes. */
16498 if (!TARGET_SSE2)
16499 return true;
16501 /* If the target says that inter-unit moves are more expensive
16502 than moving through memory, then don't generate them. */
16503 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16504 return true;
16506 /* Between SSE and general, we have moves no larger than word size. */
16507 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16508 return true;
16510 /* ??? For the cost of one register reformat penalty, we could use
16511 the same instructions to move SFmode and DFmode data, but the
16512 relevant move patterns don't support those alternatives. */
16513 if (mode == SFmode || mode == DFmode)
16514 return true;
16517 return false;
16520 /* Return true if the registers in CLASS cannot represent the change from
16521 modes FROM to TO. */
16523 bool
16524 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16525 enum reg_class class)
16527 if (from == to)
16528 return false;
16530 /* x87 registers can't do subreg at all, as all values are reformatted
16531 to extended precision. */
16532 if (MAYBE_FLOAT_CLASS_P (class))
16533 return true;
16535 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16537 /* Vector registers do not support QI or HImode loads. If we don't
16538 disallow a change to these modes, reload will assume it's ok to
16539 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16540 the vec_dupv4hi pattern. */
16541 if (GET_MODE_SIZE (from) < 4)
16542 return true;
16544 /* Vector registers do not support subreg with nonzero offsets, which
16545 are otherwise valid for integer registers. Since we can't see
16546 whether we have a nonzero offset from here, prohibit all
16547 nonparadoxical subregs changing size. */
16548 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16549 return true;
16552 return false;
16555 /* Return the cost of moving data from a register in class CLASS1 to
16556 one in class CLASS2.
16558 It is not required that the cost always equal 2 when FROM is the same as TO;
16559 on some machines it is expensive to move between registers if they are not
16560 general registers. */
16563 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16564 enum reg_class class2)
16566 /* In case we require secondary memory, compute cost of the store followed
16567 by load. In order to avoid bad register allocation choices, we need
16568 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16570 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16572 int cost = 1;
16574 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16575 MEMORY_MOVE_COST (mode, class1, 1));
16576 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16577 MEMORY_MOVE_COST (mode, class2, 1));
16579 /* In case of copying from general_purpose_register we may emit multiple
16580 stores followed by single load causing memory size mismatch stall.
16581 Count this as arbitrarily high cost of 20. */
16582 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16583 cost += 20;
16585 /* In the case of FP/MMX moves, the registers actually overlap, and we
16586 have to switch modes in order to treat them differently. */
16587 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16588 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16589 cost += 20;
16591 return cost;
16594 /* Moves between SSE/MMX and integer unit are expensive. */
16595 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16596 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16597 return ix86_cost->mmxsse_to_integer;
16598 if (MAYBE_FLOAT_CLASS_P (class1))
16599 return ix86_cost->fp_move;
16600 if (MAYBE_SSE_CLASS_P (class1))
16601 return ix86_cost->sse_move;
16602 if (MAYBE_MMX_CLASS_P (class1))
16603 return ix86_cost->mmx_move;
16604 return 2;
16607 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16609 bool
16610 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16612 /* Flags and only flags can only hold CCmode values. */
16613 if (CC_REGNO_P (regno))
16614 return GET_MODE_CLASS (mode) == MODE_CC;
16615 if (GET_MODE_CLASS (mode) == MODE_CC
16616 || GET_MODE_CLASS (mode) == MODE_RANDOM
16617 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16618 return 0;
16619 if (FP_REGNO_P (regno))
16620 return VALID_FP_MODE_P (mode);
16621 if (SSE_REGNO_P (regno))
16623 /* We implement the move patterns for all vector modes into and
16624 out of SSE registers, even when no operation instructions
16625 are available. */
16626 return (VALID_SSE_REG_MODE (mode)
16627 || VALID_SSE2_REG_MODE (mode)
16628 || VALID_MMX_REG_MODE (mode)
16629 || VALID_MMX_REG_MODE_3DNOW (mode));
16631 if (MMX_REGNO_P (regno))
16633 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16634 so if the register is available at all, then we can move data of
16635 the given mode into or out of it. */
16636 return (VALID_MMX_REG_MODE (mode)
16637 || VALID_MMX_REG_MODE_3DNOW (mode));
16640 if (mode == QImode)
16642 /* Take care for QImode values - they can be in non-QI regs,
16643 but then they do cause partial register stalls. */
16644 if (regno < 4 || TARGET_64BIT)
16645 return 1;
16646 if (!TARGET_PARTIAL_REG_STALL)
16647 return 1;
16648 return reload_in_progress || reload_completed;
16650 /* We handle both integer and floats in the general purpose registers. */
16651 else if (VALID_INT_MODE_P (mode))
16652 return 1;
16653 else if (VALID_FP_MODE_P (mode))
16654 return 1;
16655 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16656 on to use that value in smaller contexts, this can easily force a
16657 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16658 supporting DImode, allow it. */
16659 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16660 return 1;
16662 return 0;
16665 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16666 tieable integer mode. */
16668 static bool
16669 ix86_tieable_integer_mode_p (enum machine_mode mode)
16671 switch (mode)
16673 case HImode:
16674 case SImode:
16675 return true;
16677 case QImode:
16678 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16680 case DImode:
16681 return TARGET_64BIT;
16683 default:
16684 return false;
16688 /* Return true if MODE1 is accessible in a register that can hold MODE2
16689 without copying. That is, all register classes that can hold MODE2
16690 can also hold MODE1. */
16692 bool
16693 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16695 if (mode1 == mode2)
16696 return true;
16698 if (ix86_tieable_integer_mode_p (mode1)
16699 && ix86_tieable_integer_mode_p (mode2))
16700 return true;
16702 /* MODE2 being XFmode implies fp stack or general regs, which means we
16703 can tie any smaller floating point modes to it. Note that we do not
16704 tie this with TFmode. */
16705 if (mode2 == XFmode)
16706 return mode1 == SFmode || mode1 == DFmode;
16708 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16709 that we can tie it with SFmode. */
16710 if (mode2 == DFmode)
16711 return mode1 == SFmode;
16713 /* If MODE2 is only appropriate for an SSE register, then tie with
16714 any other mode acceptable to SSE registers. */
16715 if (GET_MODE_SIZE (mode2) >= 8
16716 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16717 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16719 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16720 with any other mode acceptable to MMX registers. */
16721 if (GET_MODE_SIZE (mode2) == 8
16722 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16723 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16725 return false;
16728 /* Return the cost of moving data of mode M between a
16729 register and memory. A value of 2 is the default; this cost is
16730 relative to those in `REGISTER_MOVE_COST'.
16732 If moving between registers and memory is more expensive than
16733 between two registers, you should define this macro to express the
16734 relative cost.
16736 Model also increased moving costs of QImode registers in non
16737 Q_REGS classes.
16740 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16742 if (FLOAT_CLASS_P (class))
16744 int index;
16745 switch (mode)
16747 case SFmode:
16748 index = 0;
16749 break;
16750 case DFmode:
16751 index = 1;
16752 break;
16753 case XFmode:
16754 index = 2;
16755 break;
16756 default:
16757 return 100;
16759 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16761 if (SSE_CLASS_P (class))
16763 int index;
16764 switch (GET_MODE_SIZE (mode))
16766 case 4:
16767 index = 0;
16768 break;
16769 case 8:
16770 index = 1;
16771 break;
16772 case 16:
16773 index = 2;
16774 break;
16775 default:
16776 return 100;
16778 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16780 if (MMX_CLASS_P (class))
16782 int index;
16783 switch (GET_MODE_SIZE (mode))
16785 case 4:
16786 index = 0;
16787 break;
16788 case 8:
16789 index = 1;
16790 break;
16791 default:
16792 return 100;
16794 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16796 switch (GET_MODE_SIZE (mode))
16798 case 1:
16799 if (in)
16800 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16801 : ix86_cost->movzbl_load);
16802 else
16803 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16804 : ix86_cost->int_store[0] + 4);
16805 break;
16806 case 2:
16807 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16808 default:
16809 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16810 if (mode == TFmode)
16811 mode = XFmode;
16812 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16813 * (((int) GET_MODE_SIZE (mode)
16814 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16818 /* Compute a (partial) cost for rtx X. Return true if the complete
16819 cost has been computed, and false if subexpressions should be
16820 scanned. In either case, *TOTAL contains the cost result. */
16822 static bool
16823 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16825 enum machine_mode mode = GET_MODE (x);
16827 switch (code)
16829 case CONST_INT:
16830 case CONST:
16831 case LABEL_REF:
16832 case SYMBOL_REF:
16833 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16834 *total = 3;
16835 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16836 *total = 2;
16837 else if (flag_pic && SYMBOLIC_CONST (x)
16838 && (!TARGET_64BIT
16839 || (!GET_CODE (x) != LABEL_REF
16840 && (GET_CODE (x) != SYMBOL_REF
16841 || !SYMBOL_REF_LOCAL_P (x)))))
16842 *total = 1;
16843 else
16844 *total = 0;
16845 return true;
16847 case CONST_DOUBLE:
16848 if (mode == VOIDmode)
16849 *total = 0;
16850 else
16851 switch (standard_80387_constant_p (x))
16853 case 1: /* 0.0 */
16854 *total = 1;
16855 break;
16856 default: /* Other constants */
16857 *total = 2;
16858 break;
16859 case 0:
16860 case -1:
16861 /* Start with (MEM (SYMBOL_REF)), since that's where
16862 it'll probably end up. Add a penalty for size. */
16863 *total = (COSTS_N_INSNS (1)
16864 + (flag_pic != 0 && !TARGET_64BIT)
16865 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16866 break;
16868 return true;
16870 case ZERO_EXTEND:
16871 /* The zero extensions is often completely free on x86_64, so make
16872 it as cheap as possible. */
16873 if (TARGET_64BIT && mode == DImode
16874 && GET_MODE (XEXP (x, 0)) == SImode)
16875 *total = 1;
16876 else if (TARGET_ZERO_EXTEND_WITH_AND)
16877 *total = ix86_cost->add;
16878 else
16879 *total = ix86_cost->movzx;
16880 return false;
16882 case SIGN_EXTEND:
16883 *total = ix86_cost->movsx;
16884 return false;
16886 case ASHIFT:
16887 if (GET_CODE (XEXP (x, 1)) == CONST_INT
16888 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16890 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16891 if (value == 1)
16893 *total = ix86_cost->add;
16894 return false;
16896 if ((value == 2 || value == 3)
16897 && ix86_cost->lea <= ix86_cost->shift_const)
16899 *total = ix86_cost->lea;
16900 return false;
16903 /* FALLTHRU */
16905 case ROTATE:
16906 case ASHIFTRT:
16907 case LSHIFTRT:
16908 case ROTATERT:
16909 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
16911 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16913 if (INTVAL (XEXP (x, 1)) > 32)
16914 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
16915 else
16916 *total = ix86_cost->shift_const * 2;
16918 else
16920 if (GET_CODE (XEXP (x, 1)) == AND)
16921 *total = ix86_cost->shift_var * 2;
16922 else
16923 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
16926 else
16928 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16929 *total = ix86_cost->shift_const;
16930 else
16931 *total = ix86_cost->shift_var;
16933 return false;
16935 case MULT:
16936 if (FLOAT_MODE_P (mode))
16938 *total = ix86_cost->fmul;
16939 return false;
16941 else
16943 rtx op0 = XEXP (x, 0);
16944 rtx op1 = XEXP (x, 1);
16945 int nbits;
16946 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16948 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16949 for (nbits = 0; value != 0; value &= value - 1)
16950 nbits++;
16952 else
16953 /* This is arbitrary. */
16954 nbits = 7;
16956 /* Compute costs correctly for widening multiplication. */
16957 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
16958 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
16959 == GET_MODE_SIZE (mode))
16961 int is_mulwiden = 0;
16962 enum machine_mode inner_mode = GET_MODE (op0);
16964 if (GET_CODE (op0) == GET_CODE (op1))
16965 is_mulwiden = 1, op1 = XEXP (op1, 0);
16966 else if (GET_CODE (op1) == CONST_INT)
16968 if (GET_CODE (op0) == SIGN_EXTEND)
16969 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
16970 == INTVAL (op1);
16971 else
16972 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
16975 if (is_mulwiden)
16976 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
16979 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
16980 + nbits * ix86_cost->mult_bit
16981 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
16983 return true;
16986 case DIV:
16987 case UDIV:
16988 case MOD:
16989 case UMOD:
16990 if (FLOAT_MODE_P (mode))
16991 *total = ix86_cost->fdiv;
16992 else
16993 *total = ix86_cost->divide[MODE_INDEX (mode)];
16994 return false;
16996 case PLUS:
16997 if (FLOAT_MODE_P (mode))
16998 *total = ix86_cost->fadd;
16999 else if (GET_MODE_CLASS (mode) == MODE_INT
17000 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17002 if (GET_CODE (XEXP (x, 0)) == PLUS
17003 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17004 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17005 && CONSTANT_P (XEXP (x, 1)))
17007 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17008 if (val == 2 || val == 4 || val == 8)
17010 *total = ix86_cost->lea;
17011 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17012 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17013 outer_code);
17014 *total += rtx_cost (XEXP (x, 1), outer_code);
17015 return true;
17018 else if (GET_CODE (XEXP (x, 0)) == MULT
17019 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17021 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17022 if (val == 2 || val == 4 || val == 8)
17024 *total = ix86_cost->lea;
17025 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17026 *total += rtx_cost (XEXP (x, 1), outer_code);
17027 return true;
17030 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17032 *total = ix86_cost->lea;
17033 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17034 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17035 *total += rtx_cost (XEXP (x, 1), outer_code);
17036 return true;
17039 /* FALLTHRU */
17041 case MINUS:
17042 if (FLOAT_MODE_P (mode))
17044 *total = ix86_cost->fadd;
17045 return false;
17047 /* FALLTHRU */
17049 case AND:
17050 case IOR:
17051 case XOR:
17052 if (!TARGET_64BIT && mode == DImode)
17054 *total = (ix86_cost->add * 2
17055 + (rtx_cost (XEXP (x, 0), outer_code)
17056 << (GET_MODE (XEXP (x, 0)) != DImode))
17057 + (rtx_cost (XEXP (x, 1), outer_code)
17058 << (GET_MODE (XEXP (x, 1)) != DImode)));
17059 return true;
17061 /* FALLTHRU */
17063 case NEG:
17064 if (FLOAT_MODE_P (mode))
17066 *total = ix86_cost->fchs;
17067 return false;
17069 /* FALLTHRU */
17071 case NOT:
17072 if (!TARGET_64BIT && mode == DImode)
17073 *total = ix86_cost->add * 2;
17074 else
17075 *total = ix86_cost->add;
17076 return false;
17078 case COMPARE:
17079 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17080 && XEXP (XEXP (x, 0), 1) == const1_rtx
17081 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17082 && XEXP (x, 1) == const0_rtx)
17084 /* This kind of construct is implemented using test[bwl].
17085 Treat it as if we had an AND. */
17086 *total = (ix86_cost->add
17087 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17088 + rtx_cost (const1_rtx, outer_code));
17089 return true;
17091 return false;
17093 case FLOAT_EXTEND:
17094 if (!TARGET_SSE_MATH
17095 || mode == XFmode
17096 || (mode == DFmode && !TARGET_SSE2))
17097 *total = 0;
17098 return false;
17100 case ABS:
17101 if (FLOAT_MODE_P (mode))
17102 *total = ix86_cost->fabs;
17103 return false;
17105 case SQRT:
17106 if (FLOAT_MODE_P (mode))
17107 *total = ix86_cost->fsqrt;
17108 return false;
17110 case UNSPEC:
17111 if (XINT (x, 1) == UNSPEC_TP)
17112 *total = 0;
17113 return false;
17115 default:
17116 return false;
17120 #if TARGET_MACHO
17122 static int current_machopic_label_num;
17124 /* Given a symbol name and its associated stub, write out the
17125 definition of the stub. */
17127 void
17128 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17130 unsigned int length;
17131 char *binder_name, *symbol_name, lazy_ptr_name[32];
17132 int label = ++current_machopic_label_num;
17134 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17135 symb = (*targetm.strip_name_encoding) (symb);
17137 length = strlen (stub);
17138 binder_name = alloca (length + 32);
17139 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17141 length = strlen (symb);
17142 symbol_name = alloca (length + 32);
17143 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17145 sprintf (lazy_ptr_name, "L%d$lz", label);
17147 if (MACHOPIC_PURE)
17148 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17149 else
17150 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17152 fprintf (file, "%s:\n", stub);
17153 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17155 if (MACHOPIC_PURE)
17157 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17158 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17159 fprintf (file, "\tjmp\t*%%edx\n");
17161 else
17162 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17164 fprintf (file, "%s:\n", binder_name);
17166 if (MACHOPIC_PURE)
17168 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17169 fprintf (file, "\tpushl\t%%eax\n");
17171 else
17172 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17174 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17176 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17177 fprintf (file, "%s:\n", lazy_ptr_name);
17178 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17179 fprintf (file, "\t.long %s\n", binder_name);
17182 void
17183 darwin_x86_file_end (void)
17185 darwin_file_end ();
17186 ix86_file_end ();
17188 #endif /* TARGET_MACHO */
17190 /* Order the registers for register allocator. */
17192 void
17193 x86_order_regs_for_local_alloc (void)
17195 int pos = 0;
17196 int i;
17198 /* First allocate the local general purpose registers. */
17199 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17200 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17201 reg_alloc_order [pos++] = i;
17203 /* Global general purpose registers. */
17204 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17205 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17206 reg_alloc_order [pos++] = i;
17208 /* x87 registers come first in case we are doing FP math
17209 using them. */
17210 if (!TARGET_SSE_MATH)
17211 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17212 reg_alloc_order [pos++] = i;
17214 /* SSE registers. */
17215 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17216 reg_alloc_order [pos++] = i;
17217 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17218 reg_alloc_order [pos++] = i;
17220 /* x87 registers. */
17221 if (TARGET_SSE_MATH)
17222 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17223 reg_alloc_order [pos++] = i;
17225 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17226 reg_alloc_order [pos++] = i;
17228 /* Initialize the rest of array as we do not allocate some registers
17229 at all. */
17230 while (pos < FIRST_PSEUDO_REGISTER)
17231 reg_alloc_order [pos++] = 0;
17234 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17235 struct attribute_spec.handler. */
17236 static tree
17237 ix86_handle_struct_attribute (tree *node, tree name,
17238 tree args ATTRIBUTE_UNUSED,
17239 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17241 tree *type = NULL;
17242 if (DECL_P (*node))
17244 if (TREE_CODE (*node) == TYPE_DECL)
17245 type = &TREE_TYPE (*node);
17247 else
17248 type = node;
17250 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17251 || TREE_CODE (*type) == UNION_TYPE)))
17253 warning (OPT_Wattributes, "%qs attribute ignored",
17254 IDENTIFIER_POINTER (name));
17255 *no_add_attrs = true;
17258 else if ((is_attribute_p ("ms_struct", name)
17259 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17260 || ((is_attribute_p ("gcc_struct", name)
17261 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17263 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17264 IDENTIFIER_POINTER (name));
17265 *no_add_attrs = true;
17268 return NULL_TREE;
17271 static bool
17272 ix86_ms_bitfield_layout_p (tree record_type)
17274 return (TARGET_MS_BITFIELD_LAYOUT &&
17275 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17276 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17279 /* Returns an expression indicating where the this parameter is
17280 located on entry to the FUNCTION. */
17282 static rtx
17283 x86_this_parameter (tree function)
17285 tree type = TREE_TYPE (function);
17287 if (TARGET_64BIT)
17289 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17290 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17293 if (ix86_function_regparm (type, function) > 0)
17295 tree parm;
17297 parm = TYPE_ARG_TYPES (type);
17298 /* Figure out whether or not the function has a variable number of
17299 arguments. */
17300 for (; parm; parm = TREE_CHAIN (parm))
17301 if (TREE_VALUE (parm) == void_type_node)
17302 break;
17303 /* If not, the this parameter is in the first argument. */
17304 if (parm)
17306 int regno = 0;
17307 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17308 regno = 2;
17309 return gen_rtx_REG (SImode, regno);
17313 if (aggregate_value_p (TREE_TYPE (type), type))
17314 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17315 else
17316 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17319 /* Determine whether x86_output_mi_thunk can succeed. */
17321 static bool
17322 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17323 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17324 HOST_WIDE_INT vcall_offset, tree function)
17326 /* 64-bit can handle anything. */
17327 if (TARGET_64BIT)
17328 return true;
17330 /* For 32-bit, everything's fine if we have one free register. */
17331 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17332 return true;
17334 /* Need a free register for vcall_offset. */
17335 if (vcall_offset)
17336 return false;
17338 /* Need a free register for GOT references. */
17339 if (flag_pic && !(*targetm.binds_local_p) (function))
17340 return false;
17342 /* Otherwise ok. */
17343 return true;
17346 /* Output the assembler code for a thunk function. THUNK_DECL is the
17347 declaration for the thunk function itself, FUNCTION is the decl for
17348 the target function. DELTA is an immediate constant offset to be
17349 added to THIS. If VCALL_OFFSET is nonzero, the word at
17350 *(*this + vcall_offset) should be added to THIS. */
17352 static void
17353 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17354 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17355 HOST_WIDE_INT vcall_offset, tree function)
17357 rtx xops[3];
17358 rtx this = x86_this_parameter (function);
17359 rtx this_reg, tmp;
17361 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17362 pull it in now and let DELTA benefit. */
17363 if (REG_P (this))
17364 this_reg = this;
17365 else if (vcall_offset)
17367 /* Put the this parameter into %eax. */
17368 xops[0] = this;
17369 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17370 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17372 else
17373 this_reg = NULL_RTX;
17375 /* Adjust the this parameter by a fixed constant. */
17376 if (delta)
17378 xops[0] = GEN_INT (delta);
17379 xops[1] = this_reg ? this_reg : this;
17380 if (TARGET_64BIT)
17382 if (!x86_64_general_operand (xops[0], DImode))
17384 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17385 xops[1] = tmp;
17386 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17387 xops[0] = tmp;
17388 xops[1] = this;
17390 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17392 else
17393 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17396 /* Adjust the this parameter by a value stored in the vtable. */
17397 if (vcall_offset)
17399 if (TARGET_64BIT)
17400 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17401 else
17403 int tmp_regno = 2 /* ECX */;
17404 if (lookup_attribute ("fastcall",
17405 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17406 tmp_regno = 0 /* EAX */;
17407 tmp = gen_rtx_REG (SImode, tmp_regno);
17410 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17411 xops[1] = tmp;
17412 if (TARGET_64BIT)
17413 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17414 else
17415 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17417 /* Adjust the this parameter. */
17418 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17419 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17421 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17422 xops[0] = GEN_INT (vcall_offset);
17423 xops[1] = tmp2;
17424 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17425 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17427 xops[1] = this_reg;
17428 if (TARGET_64BIT)
17429 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17430 else
17431 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17434 /* If necessary, drop THIS back to its stack slot. */
17435 if (this_reg && this_reg != this)
17437 xops[0] = this_reg;
17438 xops[1] = this;
17439 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17442 xops[0] = XEXP (DECL_RTL (function), 0);
17443 if (TARGET_64BIT)
17445 if (!flag_pic || (*targetm.binds_local_p) (function))
17446 output_asm_insn ("jmp\t%P0", xops);
17447 else
17449 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17450 tmp = gen_rtx_CONST (Pmode, tmp);
17451 tmp = gen_rtx_MEM (QImode, tmp);
17452 xops[0] = tmp;
17453 output_asm_insn ("jmp\t%A0", xops);
17456 else
17458 if (!flag_pic || (*targetm.binds_local_p) (function))
17459 output_asm_insn ("jmp\t%P0", xops);
17460 else
17461 #if TARGET_MACHO
17462 if (TARGET_MACHO)
17464 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17465 tmp = (gen_rtx_SYMBOL_REF
17466 (Pmode,
17467 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17468 tmp = gen_rtx_MEM (QImode, tmp);
17469 xops[0] = tmp;
17470 output_asm_insn ("jmp\t%0", xops);
17472 else
17473 #endif /* TARGET_MACHO */
17475 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17476 output_set_got (tmp, NULL_RTX);
17478 xops[1] = tmp;
17479 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17480 output_asm_insn ("jmp\t{*}%1", xops);
17485 static void
17486 x86_file_start (void)
17488 default_file_start ();
17489 #if TARGET_MACHO
17490 darwin_file_start ();
17491 #endif
17492 if (X86_FILE_START_VERSION_DIRECTIVE)
17493 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17494 if (X86_FILE_START_FLTUSED)
17495 fputs ("\t.global\t__fltused\n", asm_out_file);
17496 if (ix86_asm_dialect == ASM_INTEL)
17497 fputs ("\t.intel_syntax\n", asm_out_file);
17501 x86_field_alignment (tree field, int computed)
17503 enum machine_mode mode;
17504 tree type = TREE_TYPE (field);
17506 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17507 return computed;
17508 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17509 ? get_inner_array_type (type) : type);
17510 if (mode == DFmode || mode == DCmode
17511 || GET_MODE_CLASS (mode) == MODE_INT
17512 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17513 return MIN (32, computed);
17514 return computed;
17517 /* Output assembler code to FILE to increment profiler label # LABELNO
17518 for profiling a function entry. */
17519 void
17520 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17522 if (TARGET_64BIT)
17523 if (flag_pic)
17525 #ifndef NO_PROFILE_COUNTERS
17526 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17527 #endif
17528 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17530 else
17532 #ifndef NO_PROFILE_COUNTERS
17533 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17534 #endif
17535 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17537 else if (flag_pic)
17539 #ifndef NO_PROFILE_COUNTERS
17540 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17541 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17542 #endif
17543 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17545 else
17547 #ifndef NO_PROFILE_COUNTERS
17548 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17549 PROFILE_COUNT_REGISTER);
17550 #endif
17551 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17555 /* We don't have exact information about the insn sizes, but we may assume
17556 quite safely that we are informed about all 1 byte insns and memory
17557 address sizes. This is enough to eliminate unnecessary padding in
17558 99% of cases. */
17560 static int
17561 min_insn_size (rtx insn)
17563 int l = 0;
17565 if (!INSN_P (insn) || !active_insn_p (insn))
17566 return 0;
17568 /* Discard alignments we've emit and jump instructions. */
17569 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17570 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17571 return 0;
17572 if (GET_CODE (insn) == JUMP_INSN
17573 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17574 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17575 return 0;
17577 /* Important case - calls are always 5 bytes.
17578 It is common to have many calls in the row. */
17579 if (GET_CODE (insn) == CALL_INSN
17580 && symbolic_reference_mentioned_p (PATTERN (insn))
17581 && !SIBLING_CALL_P (insn))
17582 return 5;
17583 if (get_attr_length (insn) <= 1)
17584 return 1;
17586 /* For normal instructions we may rely on the sizes of addresses
17587 and the presence of symbol to require 4 bytes of encoding.
17588 This is not the case for jumps where references are PC relative. */
17589 if (GET_CODE (insn) != JUMP_INSN)
17591 l = get_attr_length_address (insn);
17592 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17593 l = 4;
17595 if (l)
17596 return 1+l;
17597 else
17598 return 2;
17601 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17602 window. */
17604 static void
17605 ix86_avoid_jump_misspredicts (void)
17607 rtx insn, start = get_insns ();
17608 int nbytes = 0, njumps = 0;
17609 int isjump = 0;
17611 /* Look for all minimal intervals of instructions containing 4 jumps.
17612 The intervals are bounded by START and INSN. NBYTES is the total
17613 size of instructions in the interval including INSN and not including
17614 START. When the NBYTES is smaller than 16 bytes, it is possible
17615 that the end of START and INSN ends up in the same 16byte page.
17617 The smallest offset in the page INSN can start is the case where START
17618 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17619 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17621 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17624 nbytes += min_insn_size (insn);
17625 if (dump_file)
17626 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17627 INSN_UID (insn), min_insn_size (insn));
17628 if ((GET_CODE (insn) == JUMP_INSN
17629 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17630 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17631 || GET_CODE (insn) == CALL_INSN)
17632 njumps++;
17633 else
17634 continue;
17636 while (njumps > 3)
17638 start = NEXT_INSN (start);
17639 if ((GET_CODE (start) == JUMP_INSN
17640 && GET_CODE (PATTERN (start)) != ADDR_VEC
17641 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17642 || GET_CODE (start) == CALL_INSN)
17643 njumps--, isjump = 1;
17644 else
17645 isjump = 0;
17646 nbytes -= min_insn_size (start);
17648 gcc_assert (njumps >= 0);
17649 if (dump_file)
17650 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17651 INSN_UID (start), INSN_UID (insn), nbytes);
17653 if (njumps == 3 && isjump && nbytes < 16)
17655 int padsize = 15 - nbytes + min_insn_size (insn);
17657 if (dump_file)
17658 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17659 INSN_UID (insn), padsize);
17660 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17665 /* AMD Athlon works faster
17666 when RET is not destination of conditional jump or directly preceded
17667 by other jump instruction. We avoid the penalty by inserting NOP just
17668 before the RET instructions in such cases. */
17669 static void
17670 ix86_pad_returns (void)
17672 edge e;
17673 edge_iterator ei;
17675 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17677 basic_block bb = e->src;
17678 rtx ret = BB_END (bb);
17679 rtx prev;
17680 bool replace = false;
17682 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17683 || !maybe_hot_bb_p (bb))
17684 continue;
17685 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17686 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17687 break;
17688 if (prev && GET_CODE (prev) == CODE_LABEL)
17690 edge e;
17691 edge_iterator ei;
17693 FOR_EACH_EDGE (e, ei, bb->preds)
17694 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17695 && !(e->flags & EDGE_FALLTHRU))
17696 replace = true;
17698 if (!replace)
17700 prev = prev_active_insn (ret);
17701 if (prev
17702 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17703 || GET_CODE (prev) == CALL_INSN))
17704 replace = true;
17705 /* Empty functions get branch mispredict even when the jump destination
17706 is not visible to us. */
17707 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17708 replace = true;
17710 if (replace)
17712 emit_insn_before (gen_return_internal_long (), ret);
17713 delete_insn (ret);
17718 /* Implement machine specific optimizations. We implement padding of returns
17719 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17720 static void
17721 ix86_reorg (void)
17723 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
17724 ix86_pad_returns ();
17725 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17726 ix86_avoid_jump_misspredicts ();
17729 /* Return nonzero when QImode register that must be represented via REX prefix
17730 is used. */
17731 bool
17732 x86_extended_QIreg_mentioned_p (rtx insn)
17734 int i;
17735 extract_insn_cached (insn);
17736 for (i = 0; i < recog_data.n_operands; i++)
17737 if (REG_P (recog_data.operand[i])
17738 && REGNO (recog_data.operand[i]) >= 4)
17739 return true;
17740 return false;
17743 /* Return nonzero when P points to register encoded via REX prefix.
17744 Called via for_each_rtx. */
17745 static int
17746 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17748 unsigned int regno;
17749 if (!REG_P (*p))
17750 return 0;
17751 regno = REGNO (*p);
17752 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17755 /* Return true when INSN mentions register that must be encoded using REX
17756 prefix. */
17757 bool
17758 x86_extended_reg_mentioned_p (rtx insn)
17760 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17763 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17764 optabs would emit if we didn't have TFmode patterns. */
17766 void
17767 x86_emit_floatuns (rtx operands[2])
17769 rtx neglab, donelab, i0, i1, f0, in, out;
17770 enum machine_mode mode, inmode;
17772 inmode = GET_MODE (operands[1]);
17773 gcc_assert (inmode == SImode || inmode == DImode);
17775 out = operands[0];
17776 in = force_reg (inmode, operands[1]);
17777 mode = GET_MODE (out);
17778 neglab = gen_label_rtx ();
17779 donelab = gen_label_rtx ();
17780 i1 = gen_reg_rtx (Pmode);
17781 f0 = gen_reg_rtx (mode);
17783 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17785 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17786 emit_jump_insn (gen_jump (donelab));
17787 emit_barrier ();
17789 emit_label (neglab);
17791 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17792 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17793 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17794 expand_float (f0, i0, 0);
17795 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17797 emit_label (donelab);
17800 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17801 with all elements equal to VAR. Return true if successful. */
17803 static bool
17804 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17805 rtx target, rtx val)
17807 enum machine_mode smode, wsmode, wvmode;
17808 rtx x;
17810 switch (mode)
17812 case V2SImode:
17813 case V2SFmode:
17814 if (!mmx_ok && !TARGET_SSE)
17815 return false;
17816 /* FALLTHRU */
17818 case V2DFmode:
17819 case V2DImode:
17820 case V4SFmode:
17821 case V4SImode:
17822 val = force_reg (GET_MODE_INNER (mode), val);
17823 x = gen_rtx_VEC_DUPLICATE (mode, val);
17824 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17825 return true;
17827 case V4HImode:
17828 if (!mmx_ok)
17829 return false;
17830 if (TARGET_SSE || TARGET_3DNOW_A)
17832 val = gen_lowpart (SImode, val);
17833 x = gen_rtx_TRUNCATE (HImode, val);
17834 x = gen_rtx_VEC_DUPLICATE (mode, x);
17835 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17836 return true;
17838 else
17840 smode = HImode;
17841 wsmode = SImode;
17842 wvmode = V2SImode;
17843 goto widen;
17846 case V8QImode:
17847 if (!mmx_ok)
17848 return false;
17849 smode = QImode;
17850 wsmode = HImode;
17851 wvmode = V4HImode;
17852 goto widen;
17853 case V8HImode:
17854 smode = HImode;
17855 wsmode = SImode;
17856 wvmode = V4SImode;
17857 goto widen;
17858 case V16QImode:
17859 smode = QImode;
17860 wsmode = HImode;
17861 wvmode = V8HImode;
17862 goto widen;
17863 widen:
17864 /* Replicate the value once into the next wider mode and recurse. */
17865 val = convert_modes (wsmode, smode, val, true);
17866 x = expand_simple_binop (wsmode, ASHIFT, val,
17867 GEN_INT (GET_MODE_BITSIZE (smode)),
17868 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17869 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
17871 x = gen_reg_rtx (wvmode);
17872 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
17873 gcc_unreachable ();
17874 emit_move_insn (target, gen_lowpart (mode, x));
17875 return true;
17877 default:
17878 return false;
17882 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17883 whose low element is VAR, and other elements are zero. Return true
17884 if successful. */
17886 static bool
17887 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
17888 rtx target, rtx var)
17890 enum machine_mode vsimode;
17891 rtx x;
17893 switch (mode)
17895 case V2SFmode:
17896 case V2SImode:
17897 if (!mmx_ok && !TARGET_SSE)
17898 return false;
17899 /* FALLTHRU */
17901 case V2DFmode:
17902 case V2DImode:
17903 var = force_reg (GET_MODE_INNER (mode), var);
17904 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
17905 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17906 return true;
17908 case V4SFmode:
17909 case V4SImode:
17910 var = force_reg (GET_MODE_INNER (mode), var);
17911 x = gen_rtx_VEC_DUPLICATE (mode, var);
17912 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
17913 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17914 return true;
17916 case V8HImode:
17917 case V16QImode:
17918 vsimode = V4SImode;
17919 goto widen;
17920 case V4HImode:
17921 case V8QImode:
17922 if (!mmx_ok)
17923 return false;
17924 vsimode = V2SImode;
17925 goto widen;
17926 widen:
17927 /* Zero extend the variable element to SImode and recurse. */
17928 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
17930 x = gen_reg_rtx (vsimode);
17931 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
17932 gcc_unreachable ();
17934 emit_move_insn (target, gen_lowpart (mode, x));
17935 return true;
17937 default:
17938 return false;
17942 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17943 consisting of the values in VALS. It is known that all elements
17944 except ONE_VAR are constants. Return true if successful. */
17946 static bool
17947 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
17948 rtx target, rtx vals, int one_var)
17950 rtx var = XVECEXP (vals, 0, one_var);
17951 enum machine_mode wmode;
17952 rtx const_vec, x;
17954 const_vec = copy_rtx (vals);
17955 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
17956 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
17958 switch (mode)
17960 case V2DFmode:
17961 case V2DImode:
17962 case V2SFmode:
17963 case V2SImode:
17964 /* For the two element vectors, it's just as easy to use
17965 the general case. */
17966 return false;
17968 case V4SFmode:
17969 case V4SImode:
17970 case V8HImode:
17971 case V4HImode:
17972 break;
17974 case V16QImode:
17975 wmode = V8HImode;
17976 goto widen;
17977 case V8QImode:
17978 wmode = V4HImode;
17979 goto widen;
17980 widen:
17981 /* There's no way to set one QImode entry easily. Combine
17982 the variable value with its adjacent constant value, and
17983 promote to an HImode set. */
17984 x = XVECEXP (vals, 0, one_var ^ 1);
17985 if (one_var & 1)
17987 var = convert_modes (HImode, QImode, var, true);
17988 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
17989 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17990 x = GEN_INT (INTVAL (x) & 0xff);
17992 else
17994 var = convert_modes (HImode, QImode, var, true);
17995 x = gen_int_mode (INTVAL (x) << 8, HImode);
17997 if (x != const0_rtx)
17998 var = expand_simple_binop (HImode, IOR, var, x, var,
17999 1, OPTAB_LIB_WIDEN);
18001 x = gen_reg_rtx (wmode);
18002 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18003 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18005 emit_move_insn (target, gen_lowpart (mode, x));
18006 return true;
18008 default:
18009 return false;
18012 emit_move_insn (target, const_vec);
18013 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18014 return true;
18017 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18018 all values variable, and none identical. */
18020 static void
18021 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18022 rtx target, rtx vals)
18024 enum machine_mode half_mode = GET_MODE_INNER (mode);
18025 rtx op0 = NULL, op1 = NULL;
18026 bool use_vec_concat = false;
18028 switch (mode)
18030 case V2SFmode:
18031 case V2SImode:
18032 if (!mmx_ok && !TARGET_SSE)
18033 break;
18034 /* FALLTHRU */
18036 case V2DFmode:
18037 case V2DImode:
18038 /* For the two element vectors, we always implement VEC_CONCAT. */
18039 op0 = XVECEXP (vals, 0, 0);
18040 op1 = XVECEXP (vals, 0, 1);
18041 use_vec_concat = true;
18042 break;
18044 case V4SFmode:
18045 half_mode = V2SFmode;
18046 goto half;
18047 case V4SImode:
18048 half_mode = V2SImode;
18049 goto half;
18050 half:
18052 rtvec v;
18054 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18055 Recurse to load the two halves. */
18057 op0 = gen_reg_rtx (half_mode);
18058 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18059 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18061 op1 = gen_reg_rtx (half_mode);
18062 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18063 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18065 use_vec_concat = true;
18067 break;
18069 case V8HImode:
18070 case V16QImode:
18071 case V4HImode:
18072 case V8QImode:
18073 break;
18075 default:
18076 gcc_unreachable ();
18079 if (use_vec_concat)
18081 if (!register_operand (op0, half_mode))
18082 op0 = force_reg (half_mode, op0);
18083 if (!register_operand (op1, half_mode))
18084 op1 = force_reg (half_mode, op1);
18086 emit_insn (gen_rtx_SET (VOIDmode, target,
18087 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18089 else
18091 int i, j, n_elts, n_words, n_elt_per_word;
18092 enum machine_mode inner_mode;
18093 rtx words[4], shift;
18095 inner_mode = GET_MODE_INNER (mode);
18096 n_elts = GET_MODE_NUNITS (mode);
18097 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18098 n_elt_per_word = n_elts / n_words;
18099 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18101 for (i = 0; i < n_words; ++i)
18103 rtx word = NULL_RTX;
18105 for (j = 0; j < n_elt_per_word; ++j)
18107 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18108 elt = convert_modes (word_mode, inner_mode, elt, true);
18110 if (j == 0)
18111 word = elt;
18112 else
18114 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18115 word, 1, OPTAB_LIB_WIDEN);
18116 word = expand_simple_binop (word_mode, IOR, word, elt,
18117 word, 1, OPTAB_LIB_WIDEN);
18121 words[i] = word;
18124 if (n_words == 1)
18125 emit_move_insn (target, gen_lowpart (mode, words[0]));
18126 else if (n_words == 2)
18128 rtx tmp = gen_reg_rtx (mode);
18129 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18130 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18131 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18132 emit_move_insn (target, tmp);
18134 else if (n_words == 4)
18136 rtx tmp = gen_reg_rtx (V4SImode);
18137 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18138 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18139 emit_move_insn (target, gen_lowpart (mode, tmp));
18141 else
18142 gcc_unreachable ();
18146 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18147 instructions unless MMX_OK is true. */
18149 void
18150 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18152 enum machine_mode mode = GET_MODE (target);
18153 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18154 int n_elts = GET_MODE_NUNITS (mode);
18155 int n_var = 0, one_var = -1;
18156 bool all_same = true, all_const_zero = true;
18157 int i;
18158 rtx x;
18160 for (i = 0; i < n_elts; ++i)
18162 x = XVECEXP (vals, 0, i);
18163 if (!CONSTANT_P (x))
18164 n_var++, one_var = i;
18165 else if (x != CONST0_RTX (inner_mode))
18166 all_const_zero = false;
18167 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18168 all_same = false;
18171 /* Constants are best loaded from the constant pool. */
18172 if (n_var == 0)
18174 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18175 return;
18178 /* If all values are identical, broadcast the value. */
18179 if (all_same
18180 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18181 XVECEXP (vals, 0, 0)))
18182 return;
18184 /* Values where only one field is non-constant are best loaded from
18185 the pool and overwritten via move later. */
18186 if (n_var == 1)
18188 if (all_const_zero && one_var == 0
18189 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
18190 XVECEXP (vals, 0, 0)))
18191 return;
18193 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18194 return;
18197 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18200 void
18201 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18203 enum machine_mode mode = GET_MODE (target);
18204 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18205 bool use_vec_merge = false;
18206 rtx tmp;
18208 switch (mode)
18210 case V2SFmode:
18211 case V2SImode:
18212 if (mmx_ok)
18214 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18215 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18216 if (elt == 0)
18217 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18218 else
18219 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18220 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18221 return;
18223 break;
18225 case V2DFmode:
18226 case V2DImode:
18228 rtx op0, op1;
18230 /* For the two element vectors, we implement a VEC_CONCAT with
18231 the extraction of the other element. */
18233 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18234 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18236 if (elt == 0)
18237 op0 = val, op1 = tmp;
18238 else
18239 op0 = tmp, op1 = val;
18241 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18242 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18244 return;
18246 case V4SFmode:
18247 switch (elt)
18249 case 0:
18250 use_vec_merge = true;
18251 break;
18253 case 1:
18254 /* tmp = target = A B C D */
18255 tmp = copy_to_reg (target);
18256 /* target = A A B B */
18257 emit_insn (gen_sse_unpcklps (target, target, target));
18258 /* target = X A B B */
18259 ix86_expand_vector_set (false, target, val, 0);
18260 /* target = A X C D */
18261 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18262 GEN_INT (1), GEN_INT (0),
18263 GEN_INT (2+4), GEN_INT (3+4)));
18264 return;
18266 case 2:
18267 /* tmp = target = A B C D */
18268 tmp = copy_to_reg (target);
18269 /* tmp = X B C D */
18270 ix86_expand_vector_set (false, tmp, val, 0);
18271 /* target = A B X D */
18272 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18273 GEN_INT (0), GEN_INT (1),
18274 GEN_INT (0+4), GEN_INT (3+4)));
18275 return;
18277 case 3:
18278 /* tmp = target = A B C D */
18279 tmp = copy_to_reg (target);
18280 /* tmp = X B C D */
18281 ix86_expand_vector_set (false, tmp, val, 0);
18282 /* target = A B X D */
18283 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18284 GEN_INT (0), GEN_INT (1),
18285 GEN_INT (2+4), GEN_INT (0+4)));
18286 return;
18288 default:
18289 gcc_unreachable ();
18291 break;
18293 case V4SImode:
18294 /* Element 0 handled by vec_merge below. */
18295 if (elt == 0)
18297 use_vec_merge = true;
18298 break;
18301 if (TARGET_SSE2)
18303 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18304 store into element 0, then shuffle them back. */
18306 rtx order[4];
18308 order[0] = GEN_INT (elt);
18309 order[1] = const1_rtx;
18310 order[2] = const2_rtx;
18311 order[3] = GEN_INT (3);
18312 order[elt] = const0_rtx;
18314 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18315 order[1], order[2], order[3]));
18317 ix86_expand_vector_set (false, target, val, 0);
18319 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18320 order[1], order[2], order[3]));
18322 else
18324 /* For SSE1, we have to reuse the V4SF code. */
18325 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18326 gen_lowpart (SFmode, val), elt);
18328 return;
18330 case V8HImode:
18331 use_vec_merge = TARGET_SSE2;
18332 break;
18333 case V4HImode:
18334 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18335 break;
18337 case V16QImode:
18338 case V8QImode:
18339 default:
18340 break;
18343 if (use_vec_merge)
18345 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18346 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18347 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18349 else
18351 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18353 emit_move_insn (mem, target);
18355 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18356 emit_move_insn (tmp, val);
18358 emit_move_insn (target, mem);
18362 void
18363 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18365 enum machine_mode mode = GET_MODE (vec);
18366 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18367 bool use_vec_extr = false;
18368 rtx tmp;
18370 switch (mode)
18372 case V2SImode:
18373 case V2SFmode:
18374 if (!mmx_ok)
18375 break;
18376 /* FALLTHRU */
18378 case V2DFmode:
18379 case V2DImode:
18380 use_vec_extr = true;
18381 break;
18383 case V4SFmode:
18384 switch (elt)
18386 case 0:
18387 tmp = vec;
18388 break;
18390 case 1:
18391 case 3:
18392 tmp = gen_reg_rtx (mode);
18393 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18394 GEN_INT (elt), GEN_INT (elt),
18395 GEN_INT (elt+4), GEN_INT (elt+4)));
18396 break;
18398 case 2:
18399 tmp = gen_reg_rtx (mode);
18400 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18401 break;
18403 default:
18404 gcc_unreachable ();
18406 vec = tmp;
18407 use_vec_extr = true;
18408 elt = 0;
18409 break;
18411 case V4SImode:
18412 if (TARGET_SSE2)
18414 switch (elt)
18416 case 0:
18417 tmp = vec;
18418 break;
18420 case 1:
18421 case 3:
18422 tmp = gen_reg_rtx (mode);
18423 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18424 GEN_INT (elt), GEN_INT (elt),
18425 GEN_INT (elt), GEN_INT (elt)));
18426 break;
18428 case 2:
18429 tmp = gen_reg_rtx (mode);
18430 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18431 break;
18433 default:
18434 gcc_unreachable ();
18436 vec = tmp;
18437 use_vec_extr = true;
18438 elt = 0;
18440 else
18442 /* For SSE1, we have to reuse the V4SF code. */
18443 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18444 gen_lowpart (V4SFmode, vec), elt);
18445 return;
18447 break;
18449 case V8HImode:
18450 use_vec_extr = TARGET_SSE2;
18451 break;
18452 case V4HImode:
18453 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18454 break;
18456 case V16QImode:
18457 case V8QImode:
18458 /* ??? Could extract the appropriate HImode element and shift. */
18459 default:
18460 break;
18463 if (use_vec_extr)
18465 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18466 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18468 /* Let the rtl optimizers know about the zero extension performed. */
18469 if (inner_mode == HImode)
18471 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18472 target = gen_lowpart (SImode, target);
18475 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18477 else
18479 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18481 emit_move_insn (mem, vec);
18483 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18484 emit_move_insn (target, tmp);
18488 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18489 pattern to reduce; DEST is the destination; IN is the input vector. */
18491 void
18492 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18494 rtx tmp1, tmp2, tmp3;
18496 tmp1 = gen_reg_rtx (V4SFmode);
18497 tmp2 = gen_reg_rtx (V4SFmode);
18498 tmp3 = gen_reg_rtx (V4SFmode);
18500 emit_insn (gen_sse_movhlps (tmp1, in, in));
18501 emit_insn (fn (tmp2, tmp1, in));
18503 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18504 GEN_INT (1), GEN_INT (1),
18505 GEN_INT (1+4), GEN_INT (1+4)));
18506 emit_insn (fn (dest, tmp2, tmp3));
18509 /* Target hook for scalar_mode_supported_p. */
18510 static bool
18511 ix86_scalar_mode_supported_p (enum machine_mode mode)
18513 if (DECIMAL_FLOAT_MODE_P (mode))
18514 return true;
18515 else
18516 return default_scalar_mode_supported_p (mode);
18519 /* Implements target hook vector_mode_supported_p. */
18520 static bool
18521 ix86_vector_mode_supported_p (enum machine_mode mode)
18523 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18524 return true;
18525 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18526 return true;
18527 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18528 return true;
18529 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18530 return true;
18531 return false;
18534 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18536 We do this in the new i386 backend to maintain source compatibility
18537 with the old cc0-based compiler. */
18539 static tree
18540 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18541 tree inputs ATTRIBUTE_UNUSED,
18542 tree clobbers)
18544 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18545 clobbers);
18546 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18547 clobbers);
18548 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18549 clobbers);
18550 return clobbers;
18553 /* Return true if this goes in small data/bss. */
18555 static bool
18556 ix86_in_large_data_p (tree exp)
18558 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18559 return false;
18561 /* Functions are never large data. */
18562 if (TREE_CODE (exp) == FUNCTION_DECL)
18563 return false;
18565 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18567 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18568 if (strcmp (section, ".ldata") == 0
18569 || strcmp (section, ".lbss") == 0)
18570 return true;
18571 return false;
18573 else
18575 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18577 /* If this is an incomplete type with size 0, then we can't put it
18578 in data because it might be too big when completed. */
18579 if (!size || size > ix86_section_threshold)
18580 return true;
18583 return false;
18585 static void
18586 ix86_encode_section_info (tree decl, rtx rtl, int first)
18588 default_encode_section_info (decl, rtl, first);
18590 if (TREE_CODE (decl) == VAR_DECL
18591 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18592 && ix86_in_large_data_p (decl))
18593 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18596 /* Worker function for REVERSE_CONDITION. */
18598 enum rtx_code
18599 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18601 return (mode != CCFPmode && mode != CCFPUmode
18602 ? reverse_condition (code)
18603 : reverse_condition_maybe_unordered (code));
18606 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18607 to OPERANDS[0]. */
18609 const char *
18610 output_387_reg_move (rtx insn, rtx *operands)
18612 if (REG_P (operands[1])
18613 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18615 if (REGNO (operands[0]) == FIRST_STACK_REG
18616 && TARGET_USE_FFREEP)
18617 return "ffreep\t%y0";
18618 return "fstp\t%y0";
18620 if (STACK_TOP_P (operands[0]))
18621 return "fld%z1\t%y1";
18622 return "fst\t%y0";
18625 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18626 FP status register is set. */
18628 void
18629 ix86_emit_fp_unordered_jump (rtx label)
18631 rtx reg = gen_reg_rtx (HImode);
18632 rtx temp;
18634 emit_insn (gen_x86_fnstsw_1 (reg));
18636 if (TARGET_USE_SAHF)
18638 emit_insn (gen_x86_sahf_1 (reg));
18640 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18641 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18643 else
18645 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18647 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18648 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18651 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18652 gen_rtx_LABEL_REF (VOIDmode, label),
18653 pc_rtx);
18654 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18655 emit_jump_insn (temp);
18658 /* Output code to perform a log1p XFmode calculation. */
18660 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18662 rtx label1 = gen_label_rtx ();
18663 rtx label2 = gen_label_rtx ();
18665 rtx tmp = gen_reg_rtx (XFmode);
18666 rtx tmp2 = gen_reg_rtx (XFmode);
18668 emit_insn (gen_absxf2 (tmp, op1));
18669 emit_insn (gen_cmpxf (tmp,
18670 CONST_DOUBLE_FROM_REAL_VALUE (
18671 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18672 XFmode)));
18673 emit_jump_insn (gen_bge (label1));
18675 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18676 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18677 emit_jump (label2);
18679 emit_label (label1);
18680 emit_move_insn (tmp, CONST1_RTX (XFmode));
18681 emit_insn (gen_addxf3 (tmp, op1, tmp));
18682 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18683 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18685 emit_label (label2);
18688 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18690 static void
18691 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18692 tree decl)
18694 /* With Binutils 2.15, the "@unwind" marker must be specified on
18695 every occurrence of the ".eh_frame" section, not just the first
18696 one. */
18697 if (TARGET_64BIT
18698 && strcmp (name, ".eh_frame") == 0)
18700 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18701 flags & SECTION_WRITE ? "aw" : "a");
18702 return;
18704 default_elf_asm_named_section (name, flags, decl);
18707 /* Return the mangling of TYPE if it is an extended fundamental type. */
18709 static const char *
18710 ix86_mangle_fundamental_type (tree type)
18712 switch (TYPE_MODE (type))
18714 case TFmode:
18715 /* __float128 is "g". */
18716 return "g";
18717 case XFmode:
18718 /* "long double" or __float80 is "e". */
18719 return "e";
18720 default:
18721 return NULL;
18725 /* For 32-bit code we can save PIC register setup by using
18726 __stack_chk_fail_local hidden function instead of calling
18727 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18728 register, so it is better to call __stack_chk_fail directly. */
18730 static tree
18731 ix86_stack_protect_fail (void)
18733 return TARGET_64BIT
18734 ? default_external_stack_protect_fail ()
18735 : default_hidden_stack_protect_fail ();
18738 /* Select a format to encode pointers in exception handling data. CODE
18739 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18740 true if the symbol may be affected by dynamic relocations.
18742 ??? All x86 object file formats are capable of representing this.
18743 After all, the relocation needed is the same as for the call insn.
18744 Whether or not a particular assembler allows us to enter such, I
18745 guess we'll have to see. */
18747 asm_preferred_eh_data_format (int code, int global)
18749 if (flag_pic)
18751 int type = DW_EH_PE_sdata8;
18752 if (!TARGET_64BIT
18753 || ix86_cmodel == CM_SMALL_PIC
18754 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18755 type = DW_EH_PE_sdata4;
18756 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18758 if (ix86_cmodel == CM_SMALL
18759 || (ix86_cmodel == CM_MEDIUM && code))
18760 return DW_EH_PE_udata4;
18761 return DW_EH_PE_absptr;
18764 #include "gt-i386.h"