2006-01-30 Marcin Dalecki <martin@dalecki.de>
[official-gcc.git] / gcc / config / i386 / i386.c
blobc9029f0090d98eab197c214b1e118053bc9b6bbe
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
53 #ifndef CHECK_STACK_LIMIT
54 #define CHECK_STACK_LIMIT (-1)
55 #endif
57 /* Return index of given mode in mult and division cost tables. */
58 #define MODE_INDEX(mode) \
59 ((mode) == QImode ? 0 \
60 : (mode) == HImode ? 1 \
61 : (mode) == SImode ? 2 \
62 : (mode) == DImode ? 3 \
63 : 4)
65 /* Processor costs (relative to an add) */
66 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
67 #define COSTS_N_BYTES(N) ((N) * 2)
69 static const
70 struct processor_costs size_cost = { /* costs for tunning for size */
71 COSTS_N_BYTES (2), /* cost of an add instruction */
72 COSTS_N_BYTES (3), /* cost of a lea instruction */
73 COSTS_N_BYTES (2), /* variable shift costs */
74 COSTS_N_BYTES (3), /* constant shift costs */
75 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
76 COSTS_N_BYTES (3), /* HI */
77 COSTS_N_BYTES (3), /* SI */
78 COSTS_N_BYTES (3), /* DI */
79 COSTS_N_BYTES (5)}, /* other */
80 0, /* cost of multiply per each bit set */
81 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 COSTS_N_BYTES (3), /* cost of movsx */
87 COSTS_N_BYTES (3), /* cost of movzx */
88 0, /* "large" insn */
89 2, /* MOVE_RATIO */
90 2, /* cost for loading QImode using movzbl */
91 {2, 2, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 2, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {2, 2, 2}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {2, 2, 2}, /* cost of loading integer registers */
99 3, /* cost of moving MMX register */
100 {3, 3}, /* cost of loading MMX registers
101 in SImode and DImode */
102 {3, 3}, /* cost of storing MMX registers
103 in SImode and DImode */
104 3, /* cost of moving SSE register */
105 {3, 3, 3}, /* cost of loading SSE registers
106 in SImode, DImode and TImode */
107 {3, 3, 3}, /* cost of storing SSE registers
108 in SImode, DImode and TImode */
109 3, /* MMX or SSE register to integer */
110 0, /* size of prefetch block */
111 0, /* number of parallel prefetches */
112 2, /* Branch cost */
113 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
114 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
115 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
116 COSTS_N_BYTES (2), /* cost of FABS instruction. */
117 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
118 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
121 /* Processor costs (relative to an add) */
122 static const
123 struct processor_costs i386_cost = { /* 386 specific costs */
124 COSTS_N_INSNS (1), /* cost of an add instruction */
125 COSTS_N_INSNS (1), /* cost of a lea instruction */
126 COSTS_N_INSNS (3), /* variable shift costs */
127 COSTS_N_INSNS (2), /* constant shift costs */
128 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
129 COSTS_N_INSNS (6), /* HI */
130 COSTS_N_INSNS (6), /* SI */
131 COSTS_N_INSNS (6), /* DI */
132 COSTS_N_INSNS (6)}, /* other */
133 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
134 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
135 COSTS_N_INSNS (23), /* HI */
136 COSTS_N_INSNS (23), /* SI */
137 COSTS_N_INSNS (23), /* DI */
138 COSTS_N_INSNS (23)}, /* other */
139 COSTS_N_INSNS (3), /* cost of movsx */
140 COSTS_N_INSNS (2), /* cost of movzx */
141 15, /* "large" insn */
142 3, /* MOVE_RATIO */
143 4, /* cost for loading QImode using movzbl */
144 {2, 4, 2}, /* cost of loading integer registers
145 in QImode, HImode and SImode.
146 Relative to reg-reg move (2). */
147 {2, 4, 2}, /* cost of storing integer registers */
148 2, /* cost of reg,reg fld/fst */
149 {8, 8, 8}, /* cost of loading fp registers
150 in SFmode, DFmode and XFmode */
151 {8, 8, 8}, /* cost of loading integer registers */
152 2, /* cost of moving MMX register */
153 {4, 8}, /* cost of loading MMX registers
154 in SImode and DImode */
155 {4, 8}, /* cost of storing MMX registers
156 in SImode and DImode */
157 2, /* cost of moving SSE register */
158 {4, 8, 16}, /* cost of loading SSE registers
159 in SImode, DImode and TImode */
160 {4, 8, 16}, /* cost of storing SSE registers
161 in SImode, DImode and TImode */
162 3, /* MMX or SSE register to integer */
163 0, /* size of prefetch block */
164 0, /* number of parallel prefetches */
165 1, /* Branch cost */
166 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
167 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
168 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
169 COSTS_N_INSNS (22), /* cost of FABS instruction. */
170 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
171 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
174 static const
175 struct processor_costs i486_cost = { /* 486 specific costs */
176 COSTS_N_INSNS (1), /* cost of an add instruction */
177 COSTS_N_INSNS (1), /* cost of a lea instruction */
178 COSTS_N_INSNS (3), /* variable shift costs */
179 COSTS_N_INSNS (2), /* constant shift costs */
180 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
181 COSTS_N_INSNS (12), /* HI */
182 COSTS_N_INSNS (12), /* SI */
183 COSTS_N_INSNS (12), /* DI */
184 COSTS_N_INSNS (12)}, /* other */
185 1, /* cost of multiply per each bit set */
186 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
187 COSTS_N_INSNS (40), /* HI */
188 COSTS_N_INSNS (40), /* SI */
189 COSTS_N_INSNS (40), /* DI */
190 COSTS_N_INSNS (40)}, /* other */
191 COSTS_N_INSNS (3), /* cost of movsx */
192 COSTS_N_INSNS (2), /* cost of movzx */
193 15, /* "large" insn */
194 3, /* MOVE_RATIO */
195 4, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {8, 8, 8}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {8, 8, 8}, /* cost of loading integer registers */
204 2, /* cost of moving MMX register */
205 {4, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {4, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 1, /* Branch cost */
218 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
219 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
220 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
221 COSTS_N_INSNS (3), /* cost of FABS instruction. */
222 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
223 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
226 static const
227 struct processor_costs pentium_cost = {
228 COSTS_N_INSNS (1), /* cost of an add instruction */
229 COSTS_N_INSNS (1), /* cost of a lea instruction */
230 COSTS_N_INSNS (4), /* variable shift costs */
231 COSTS_N_INSNS (1), /* constant shift costs */
232 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
233 COSTS_N_INSNS (11), /* HI */
234 COSTS_N_INSNS (11), /* SI */
235 COSTS_N_INSNS (11), /* DI */
236 COSTS_N_INSNS (11)}, /* other */
237 0, /* cost of multiply per each bit set */
238 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
239 COSTS_N_INSNS (25), /* HI */
240 COSTS_N_INSNS (25), /* SI */
241 COSTS_N_INSNS (25), /* DI */
242 COSTS_N_INSNS (25)}, /* other */
243 COSTS_N_INSNS (3), /* cost of movsx */
244 COSTS_N_INSNS (2), /* cost of movzx */
245 8, /* "large" insn */
246 6, /* MOVE_RATIO */
247 6, /* cost for loading QImode using movzbl */
248 {2, 4, 2}, /* cost of loading integer registers
249 in QImode, HImode and SImode.
250 Relative to reg-reg move (2). */
251 {2, 4, 2}, /* cost of storing integer registers */
252 2, /* cost of reg,reg fld/fst */
253 {2, 2, 6}, /* cost of loading fp registers
254 in SFmode, DFmode and XFmode */
255 {4, 4, 6}, /* cost of loading integer registers */
256 8, /* cost of moving MMX register */
257 {8, 8}, /* cost of loading MMX registers
258 in SImode and DImode */
259 {8, 8}, /* cost of storing MMX registers
260 in SImode and DImode */
261 2, /* cost of moving SSE register */
262 {4, 8, 16}, /* cost of loading SSE registers
263 in SImode, DImode and TImode */
264 {4, 8, 16}, /* cost of storing SSE registers
265 in SImode, DImode and TImode */
266 3, /* MMX or SSE register to integer */
267 0, /* size of prefetch block */
268 0, /* number of parallel prefetches */
269 2, /* Branch cost */
270 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
271 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
272 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
273 COSTS_N_INSNS (1), /* cost of FABS instruction. */
274 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
275 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
278 static const
279 struct processor_costs pentiumpro_cost = {
280 COSTS_N_INSNS (1), /* cost of an add instruction */
281 COSTS_N_INSNS (1), /* cost of a lea instruction */
282 COSTS_N_INSNS (1), /* variable shift costs */
283 COSTS_N_INSNS (1), /* constant shift costs */
284 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
285 COSTS_N_INSNS (4), /* HI */
286 COSTS_N_INSNS (4), /* SI */
287 COSTS_N_INSNS (4), /* DI */
288 COSTS_N_INSNS (4)}, /* other */
289 0, /* cost of multiply per each bit set */
290 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
291 COSTS_N_INSNS (17), /* HI */
292 COSTS_N_INSNS (17), /* SI */
293 COSTS_N_INSNS (17), /* DI */
294 COSTS_N_INSNS (17)}, /* other */
295 COSTS_N_INSNS (1), /* cost of movsx */
296 COSTS_N_INSNS (1), /* cost of movzx */
297 8, /* "large" insn */
298 6, /* MOVE_RATIO */
299 2, /* cost for loading QImode using movzbl */
300 {4, 4, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 2, 2}, /* cost of storing integer registers */
304 2, /* cost of reg,reg fld/fst */
305 {2, 2, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 6}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 3, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 6, /* number of parallel prefetches */
321 2, /* Branch cost */
322 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
323 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
324 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
325 COSTS_N_INSNS (2), /* cost of FABS instruction. */
326 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
327 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs k6_cost = {
332 COSTS_N_INSNS (1), /* cost of an add instruction */
333 COSTS_N_INSNS (2), /* cost of a lea instruction */
334 COSTS_N_INSNS (1), /* variable shift costs */
335 COSTS_N_INSNS (1), /* constant shift costs */
336 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
337 COSTS_N_INSNS (3), /* HI */
338 COSTS_N_INSNS (3), /* SI */
339 COSTS_N_INSNS (3), /* DI */
340 COSTS_N_INSNS (3)}, /* other */
341 0, /* cost of multiply per each bit set */
342 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
343 COSTS_N_INSNS (18), /* HI */
344 COSTS_N_INSNS (18), /* SI */
345 COSTS_N_INSNS (18), /* DI */
346 COSTS_N_INSNS (18)}, /* other */
347 COSTS_N_INSNS (2), /* cost of movsx */
348 COSTS_N_INSNS (2), /* cost of movzx */
349 8, /* "large" insn */
350 4, /* MOVE_RATIO */
351 3, /* cost for loading QImode using movzbl */
352 {4, 5, 4}, /* cost of loading integer registers
353 in QImode, HImode and SImode.
354 Relative to reg-reg move (2). */
355 {2, 3, 2}, /* cost of storing integer registers */
356 4, /* cost of reg,reg fld/fst */
357 {6, 6, 6}, /* cost of loading fp registers
358 in SFmode, DFmode and XFmode */
359 {4, 4, 4}, /* cost of loading integer registers */
360 2, /* cost of moving MMX register */
361 {2, 2}, /* cost of loading MMX registers
362 in SImode and DImode */
363 {2, 2}, /* cost of storing MMX registers
364 in SImode and DImode */
365 2, /* cost of moving SSE register */
366 {2, 2, 8}, /* cost of loading SSE registers
367 in SImode, DImode and TImode */
368 {2, 2, 8}, /* cost of storing SSE registers
369 in SImode, DImode and TImode */
370 6, /* MMX or SSE register to integer */
371 32, /* size of prefetch block */
372 1, /* number of parallel prefetches */
373 1, /* Branch cost */
374 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
375 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
376 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
377 COSTS_N_INSNS (2), /* cost of FABS instruction. */
378 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
379 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
382 static const
383 struct processor_costs athlon_cost = {
384 COSTS_N_INSNS (1), /* cost of an add instruction */
385 COSTS_N_INSNS (2), /* cost of a lea instruction */
386 COSTS_N_INSNS (1), /* variable shift costs */
387 COSTS_N_INSNS (1), /* constant shift costs */
388 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
389 COSTS_N_INSNS (5), /* HI */
390 COSTS_N_INSNS (5), /* SI */
391 COSTS_N_INSNS (5), /* DI */
392 COSTS_N_INSNS (5)}, /* other */
393 0, /* cost of multiply per each bit set */
394 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
395 COSTS_N_INSNS (26), /* HI */
396 COSTS_N_INSNS (42), /* SI */
397 COSTS_N_INSNS (74), /* DI */
398 COSTS_N_INSNS (74)}, /* other */
399 COSTS_N_INSNS (1), /* cost of movsx */
400 COSTS_N_INSNS (1), /* cost of movzx */
401 8, /* "large" insn */
402 9, /* MOVE_RATIO */
403 4, /* cost for loading QImode using movzbl */
404 {3, 4, 3}, /* cost of loading integer registers
405 in QImode, HImode and SImode.
406 Relative to reg-reg move (2). */
407 {3, 4, 3}, /* cost of storing integer registers */
408 4, /* cost of reg,reg fld/fst */
409 {4, 4, 12}, /* cost of loading fp registers
410 in SFmode, DFmode and XFmode */
411 {6, 6, 8}, /* cost of loading integer registers */
412 2, /* cost of moving MMX register */
413 {4, 4}, /* cost of loading MMX registers
414 in SImode and DImode */
415 {4, 4}, /* cost of storing MMX registers
416 in SImode and DImode */
417 2, /* cost of moving SSE register */
418 {4, 4, 6}, /* cost of loading SSE registers
419 in SImode, DImode and TImode */
420 {4, 4, 5}, /* cost of storing SSE registers
421 in SImode, DImode and TImode */
422 5, /* MMX or SSE register to integer */
423 64, /* size of prefetch block */
424 6, /* number of parallel prefetches */
425 5, /* Branch cost */
426 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (2), /* cost of FABS instruction. */
430 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
434 static const
435 struct processor_costs k8_cost = {
436 COSTS_N_INSNS (1), /* cost of an add instruction */
437 COSTS_N_INSNS (2), /* cost of a lea instruction */
438 COSTS_N_INSNS (1), /* variable shift costs */
439 COSTS_N_INSNS (1), /* constant shift costs */
440 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
441 COSTS_N_INSNS (4), /* HI */
442 COSTS_N_INSNS (3), /* SI */
443 COSTS_N_INSNS (4), /* DI */
444 COSTS_N_INSNS (5)}, /* other */
445 0, /* cost of multiply per each bit set */
446 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
447 COSTS_N_INSNS (26), /* HI */
448 COSTS_N_INSNS (42), /* SI */
449 COSTS_N_INSNS (74), /* DI */
450 COSTS_N_INSNS (74)}, /* other */
451 COSTS_N_INSNS (1), /* cost of movsx */
452 COSTS_N_INSNS (1), /* cost of movzx */
453 8, /* "large" insn */
454 9, /* MOVE_RATIO */
455 4, /* cost for loading QImode using movzbl */
456 {3, 4, 3}, /* cost of loading integer registers
457 in QImode, HImode and SImode.
458 Relative to reg-reg move (2). */
459 {3, 4, 3}, /* cost of storing integer registers */
460 4, /* cost of reg,reg fld/fst */
461 {4, 4, 12}, /* cost of loading fp registers
462 in SFmode, DFmode and XFmode */
463 {6, 6, 8}, /* cost of loading integer registers */
464 2, /* cost of moving MMX register */
465 {3, 3}, /* cost of loading MMX registers
466 in SImode and DImode */
467 {4, 4}, /* cost of storing MMX registers
468 in SImode and DImode */
469 2, /* cost of moving SSE register */
470 {4, 3, 6}, /* cost of loading SSE registers
471 in SImode, DImode and TImode */
472 {4, 4, 5}, /* cost of storing SSE registers
473 in SImode, DImode and TImode */
474 5, /* MMX or SSE register to integer */
475 64, /* size of prefetch block */
476 6, /* number of parallel prefetches */
477 5, /* Branch cost */
478 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
479 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
480 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
481 COSTS_N_INSNS (2), /* cost of FABS instruction. */
482 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
483 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
486 static const
487 struct processor_costs pentium4_cost = {
488 COSTS_N_INSNS (1), /* cost of an add instruction */
489 COSTS_N_INSNS (3), /* cost of a lea instruction */
490 COSTS_N_INSNS (4), /* variable shift costs */
491 COSTS_N_INSNS (4), /* constant shift costs */
492 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
493 COSTS_N_INSNS (15), /* HI */
494 COSTS_N_INSNS (15), /* SI */
495 COSTS_N_INSNS (15), /* DI */
496 COSTS_N_INSNS (15)}, /* other */
497 0, /* cost of multiply per each bit set */
498 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
499 COSTS_N_INSNS (56), /* HI */
500 COSTS_N_INSNS (56), /* SI */
501 COSTS_N_INSNS (56), /* DI */
502 COSTS_N_INSNS (56)}, /* other */
503 COSTS_N_INSNS (1), /* cost of movsx */
504 COSTS_N_INSNS (1), /* cost of movzx */
505 16, /* "large" insn */
506 6, /* MOVE_RATIO */
507 2, /* cost for loading QImode using movzbl */
508 {4, 5, 4}, /* cost of loading integer registers
509 in QImode, HImode and SImode.
510 Relative to reg-reg move (2). */
511 {2, 3, 2}, /* cost of storing integer registers */
512 2, /* cost of reg,reg fld/fst */
513 {2, 2, 6}, /* cost of loading fp registers
514 in SFmode, DFmode and XFmode */
515 {4, 4, 6}, /* cost of loading integer registers */
516 2, /* cost of moving MMX register */
517 {2, 2}, /* cost of loading MMX registers
518 in SImode and DImode */
519 {2, 2}, /* cost of storing MMX registers
520 in SImode and DImode */
521 12, /* cost of moving SSE register */
522 {12, 12, 12}, /* cost of loading SSE registers
523 in SImode, DImode and TImode */
524 {2, 2, 8}, /* cost of storing SSE registers
525 in SImode, DImode and TImode */
526 10, /* MMX or SSE register to integer */
527 64, /* size of prefetch block */
528 6, /* number of parallel prefetches */
529 2, /* Branch cost */
530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
538 static const
539 struct processor_costs nocona_cost = {
540 COSTS_N_INSNS (1), /* cost of an add instruction */
541 COSTS_N_INSNS (1), /* cost of a lea instruction */
542 COSTS_N_INSNS (1), /* variable shift costs */
543 COSTS_N_INSNS (1), /* constant shift costs */
544 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
545 COSTS_N_INSNS (10), /* HI */
546 COSTS_N_INSNS (10), /* SI */
547 COSTS_N_INSNS (10), /* DI */
548 COSTS_N_INSNS (10)}, /* other */
549 0, /* cost of multiply per each bit set */
550 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
551 COSTS_N_INSNS (66), /* HI */
552 COSTS_N_INSNS (66), /* SI */
553 COSTS_N_INSNS (66), /* DI */
554 COSTS_N_INSNS (66)}, /* other */
555 COSTS_N_INSNS (1), /* cost of movsx */
556 COSTS_N_INSNS (1), /* cost of movzx */
557 16, /* "large" insn */
558 17, /* MOVE_RATIO */
559 4, /* cost for loading QImode using movzbl */
560 {4, 4, 4}, /* cost of loading integer registers
561 in QImode, HImode and SImode.
562 Relative to reg-reg move (2). */
563 {4, 4, 4}, /* cost of storing integer registers */
564 3, /* cost of reg,reg fld/fst */
565 {12, 12, 12}, /* cost of loading fp registers
566 in SFmode, DFmode and XFmode */
567 {4, 4, 4}, /* cost of loading integer registers */
568 6, /* cost of moving MMX register */
569 {12, 12}, /* cost of loading MMX registers
570 in SImode and DImode */
571 {12, 12}, /* cost of storing MMX registers
572 in SImode and DImode */
573 6, /* cost of moving SSE register */
574 {12, 12, 12}, /* cost of loading SSE registers
575 in SImode, DImode and TImode */
576 {12, 12, 12}, /* cost of storing SSE registers
577 in SImode, DImode and TImode */
578 8, /* MMX or SSE register to integer */
579 128, /* size of prefetch block */
580 8, /* number of parallel prefetches */
581 1, /* Branch cost */
582 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
583 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
584 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
585 COSTS_N_INSNS (3), /* cost of FABS instruction. */
586 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
587 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
590 /* Generic64 should produce code tuned for Nocona and K8. */
591 static const
592 struct processor_costs generic64_cost = {
593 COSTS_N_INSNS (1), /* cost of an add instruction */
594 /* On all chips taken into consideration lea is 2 cycles and more. With
595 this cost however our current implementation of synth_mult results in
596 use of unnecesary temporary registers causing regression on several
597 SPECfp benchmarks. */
598 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (4), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (4), /* DI */
605 COSTS_N_INSNS (2)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (26), /* HI */
609 COSTS_N_INSNS (42), /* SI */
610 COSTS_N_INSNS (74), /* DI */
611 COSTS_N_INSNS (74)}, /* other */
612 COSTS_N_INSNS (1), /* cost of movsx */
613 COSTS_N_INSNS (1), /* cost of movzx */
614 8, /* "large" insn */
615 17, /* MOVE_RATIO */
616 4, /* cost for loading QImode using movzbl */
617 {4, 4, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {4, 4, 4}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {12, 12, 12}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {6, 6, 8}, /* cost of loading integer registers */
625 2, /* cost of moving MMX register */
626 {8, 8}, /* cost of loading MMX registers
627 in SImode and DImode */
628 {8, 8}, /* cost of storing MMX registers
629 in SImode and DImode */
630 2, /* cost of moving SSE register */
631 {8, 8, 8}, /* cost of loading SSE registers
632 in SImode, DImode and TImode */
633 {8, 8, 8}, /* cost of storing SSE registers
634 in SImode, DImode and TImode */
635 5, /* MMX or SSE register to integer */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
638 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
639 is increased to perhaps more appropriate value of 5. */
640 3, /* Branch cost */
641 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
642 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
643 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
644 COSTS_N_INSNS (8), /* cost of FABS instruction. */
645 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
646 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
649 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
650 static const
651 struct processor_costs generic32_cost = {
652 COSTS_N_INSNS (1), /* cost of an add instruction */
653 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
654 COSTS_N_INSNS (1), /* variable shift costs */
655 COSTS_N_INSNS (1), /* constant shift costs */
656 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
657 COSTS_N_INSNS (4), /* HI */
658 COSTS_N_INSNS (3), /* SI */
659 COSTS_N_INSNS (4), /* DI */
660 COSTS_N_INSNS (2)}, /* other */
661 0, /* cost of multiply per each bit set */
662 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
663 COSTS_N_INSNS (26), /* HI */
664 COSTS_N_INSNS (42), /* SI */
665 COSTS_N_INSNS (74), /* DI */
666 COSTS_N_INSNS (74)}, /* other */
667 COSTS_N_INSNS (1), /* cost of movsx */
668 COSTS_N_INSNS (1), /* cost of movzx */
669 8, /* "large" insn */
670 17, /* MOVE_RATIO */
671 4, /* cost for loading QImode using movzbl */
672 {4, 4, 4}, /* cost of loading integer registers
673 in QImode, HImode and SImode.
674 Relative to reg-reg move (2). */
675 {4, 4, 4}, /* cost of storing integer registers */
676 4, /* cost of reg,reg fld/fst */
677 {12, 12, 12}, /* cost of loading fp registers
678 in SFmode, DFmode and XFmode */
679 {6, 6, 8}, /* cost of loading integer registers */
680 2, /* cost of moving MMX register */
681 {8, 8}, /* cost of loading MMX registers
682 in SImode and DImode */
683 {8, 8}, /* cost of storing MMX registers
684 in SImode and DImode */
685 2, /* cost of moving SSE register */
686 {8, 8, 8}, /* cost of loading SSE registers
687 in SImode, DImode and TImode */
688 {8, 8, 8}, /* cost of storing SSE registers
689 in SImode, DImode and TImode */
690 5, /* MMX or SSE register to integer */
691 64, /* size of prefetch block */
692 6, /* number of parallel prefetches */
693 3, /* Branch cost */
694 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
695 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
696 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
697 COSTS_N_INSNS (8), /* cost of FABS instruction. */
698 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
699 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
702 const struct processor_costs *ix86_cost = &pentium_cost;
704 /* Processor feature/optimization bitmasks. */
705 #define m_386 (1<<PROCESSOR_I386)
706 #define m_486 (1<<PROCESSOR_I486)
707 #define m_PENT (1<<PROCESSOR_PENTIUM)
708 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
709 #define m_K6 (1<<PROCESSOR_K6)
710 #define m_ATHLON (1<<PROCESSOR_ATHLON)
711 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
712 #define m_K8 (1<<PROCESSOR_K8)
713 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
714 #define m_NOCONA (1<<PROCESSOR_NOCONA)
715 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
716 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
717 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
719 /* Generic instruction choice should be common subset of supported CPUs
720 (PPro/PENT4/NOCONA/Athlon/K8). */
722 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
723 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
724 generic because it is not working well with PPro base chips. */
725 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
726 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
727 const int x86_zero_extend_with_and = m_486 | m_PENT;
728 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
729 const int x86_double_with_add = ~m_386;
730 const int x86_use_bit_test = m_386;
731 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
732 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
733 const int x86_fisttp = m_NOCONA;
734 const int x86_3dnow_a = m_ATHLON_K8;
735 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
736 /* Branch hints were put in P4 based on simulation result. But
737 after P4 was made, no performance benefit was observed with
738 branch hints. It also increases the code size. As the result,
739 icc never generates branch hints. */
740 const int x86_branch_hints = 0;
741 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
742 /* We probably ought to watch for partial register stalls on Generic32
743 compilation setting as well. However in current implementation the
744 partial register stalls are not eliminated very well - they can
745 be introduced via subregs synthetized by combine and can happen
746 in caller/callee saving sequences.
747 Because this option pays back little on PPro based chips and is in conflict
748 with partial reg. dependencies used by Athlon/P4 based chips, it is better
749 to leave it off for generic32 for now. */
750 const int x86_partial_reg_stall = m_PPRO;
751 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
752 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
753 const int x86_use_mov0 = m_K6;
754 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
755 const int x86_read_modify_write = ~m_PENT;
756 const int x86_read_modify = ~(m_PENT | m_PPRO);
757 const int x86_split_long_moves = m_PPRO;
758 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
759 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
760 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
761 const int x86_qimode_math = ~(0);
762 const int x86_promote_qi_regs = 0;
763 /* On PPro this flag is meant to avoid partial register stalls. Just like
764 the x86_partial_reg_stall this option might be considered for Generic32
765 if our scheme for avoiding partial stalls was more effective. */
766 const int x86_himode_math = ~(m_PPRO);
767 const int x86_promote_hi_regs = m_PPRO;
768 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
769 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
770 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
771 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
772 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
773 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
774 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
775 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
776 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
777 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
778 const int x86_shift1 = ~m_486;
779 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
780 /* In Generic model we have an confict here in between PPro/Pentium4 based chips
781 that thread 128bit SSE registers as single units versus K8 based chips that
782 divide SSE registers to two 64bit halves.
783 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
784 to allow register renaming on 128bit SSE units, but usually results in one
785 extra microop on 64bit SSE units. Experimental results shows that disabling
786 this option on P4 brings over 20% SPECfp regression, while enabling it on
787 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
788 of moves. */
789 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
790 /* Set for machines where the type and dependencies are resolved on SSE
791 register parts instead of whole registers, so we may maintain just
792 lower part of scalar values in proper format leaving the upper part
793 undefined. */
794 const int x86_sse_split_regs = m_ATHLON_K8;
795 const int x86_sse_typeless_stores = m_ATHLON_K8;
796 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
797 const int x86_use_ffreep = m_ATHLON_K8;
798 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
799 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
801 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
802 integer data in xmm registers. Which results in pretty abysmal code. */
803 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
805 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
806 /* Some CPU cores are not able to predict more than 4 branch instructions in
807 the 16 byte window. */
808 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
809 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
810 const int x86_use_bt = m_ATHLON_K8;
811 /* Compare and exchange was added for 80486. */
812 const int x86_cmpxchg = ~m_386;
813 /* Compare and exchange 8 bytes was added for pentium. */
814 const int x86_cmpxchg8b = ~(m_386 | m_486);
815 /* Compare and exchange 16 bytes was added for nocona. */
816 const int x86_cmpxchg16b = m_NOCONA;
817 /* Exchange and add was added for 80486. */
818 const int x86_xadd = ~m_386;
819 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
821 /* In case the average insn count for single function invocation is
822 lower than this constant, emit fast (but longer) prologue and
823 epilogue code. */
824 #define FAST_PROLOGUE_INSN_COUNT 20
826 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
827 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
828 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
829 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
831 /* Array of the smallest class containing reg number REGNO, indexed by
832 REGNO. Used by REGNO_REG_CLASS in i386.h. */
834 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
836 /* ax, dx, cx, bx */
837 AREG, DREG, CREG, BREG,
838 /* si, di, bp, sp */
839 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
840 /* FP registers */
841 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
842 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
843 /* arg pointer */
844 NON_Q_REGS,
845 /* flags, fpsr, dirflag, frame */
846 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
847 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
848 SSE_REGS, SSE_REGS,
849 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
850 MMX_REGS, MMX_REGS,
851 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
852 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
853 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
854 SSE_REGS, SSE_REGS,
857 /* The "default" register map used in 32bit mode. */
859 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
861 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
862 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
863 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
864 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
865 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
866 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
867 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
870 static int const x86_64_int_parameter_registers[6] =
872 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
873 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
876 static int const x86_64_int_return_registers[4] =
878 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
881 /* The "default" register map used in 64bit mode. */
882 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
884 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
885 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
886 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
887 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
888 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
889 8,9,10,11,12,13,14,15, /* extended integer registers */
890 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
893 /* Define the register numbers to be used in Dwarf debugging information.
894 The SVR4 reference port C compiler uses the following register numbers
895 in its Dwarf output code:
896 0 for %eax (gcc regno = 0)
897 1 for %ecx (gcc regno = 2)
898 2 for %edx (gcc regno = 1)
899 3 for %ebx (gcc regno = 3)
900 4 for %esp (gcc regno = 7)
901 5 for %ebp (gcc regno = 6)
902 6 for %esi (gcc regno = 4)
903 7 for %edi (gcc regno = 5)
904 The following three DWARF register numbers are never generated by
905 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
906 believes these numbers have these meanings.
907 8 for %eip (no gcc equivalent)
908 9 for %eflags (gcc regno = 17)
909 10 for %trapno (no gcc equivalent)
910 It is not at all clear how we should number the FP stack registers
911 for the x86 architecture. If the version of SDB on x86/svr4 were
912 a bit less brain dead with respect to floating-point then we would
913 have a precedent to follow with respect to DWARF register numbers
914 for x86 FP registers, but the SDB on x86/svr4 is so completely
915 broken with respect to FP registers that it is hardly worth thinking
916 of it as something to strive for compatibility with.
917 The version of x86/svr4 SDB I have at the moment does (partially)
918 seem to believe that DWARF register number 11 is associated with
919 the x86 register %st(0), but that's about all. Higher DWARF
920 register numbers don't seem to be associated with anything in
921 particular, and even for DWARF regno 11, SDB only seems to under-
922 stand that it should say that a variable lives in %st(0) (when
923 asked via an `=' command) if we said it was in DWARF regno 11,
924 but SDB still prints garbage when asked for the value of the
925 variable in question (via a `/' command).
926 (Also note that the labels SDB prints for various FP stack regs
927 when doing an `x' command are all wrong.)
928 Note that these problems generally don't affect the native SVR4
929 C compiler because it doesn't allow the use of -O with -g and
930 because when it is *not* optimizing, it allocates a memory
931 location for each floating-point variable, and the memory
932 location is what gets described in the DWARF AT_location
933 attribute for the variable in question.
934 Regardless of the severe mental illness of the x86/svr4 SDB, we
935 do something sensible here and we use the following DWARF
936 register numbers. Note that these are all stack-top-relative
937 numbers.
938 11 for %st(0) (gcc regno = 8)
939 12 for %st(1) (gcc regno = 9)
940 13 for %st(2) (gcc regno = 10)
941 14 for %st(3) (gcc regno = 11)
942 15 for %st(4) (gcc regno = 12)
943 16 for %st(5) (gcc regno = 13)
944 17 for %st(6) (gcc regno = 14)
945 18 for %st(7) (gcc regno = 15)
947 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
949 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
950 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
951 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
952 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
953 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
954 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
955 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
958 /* Test and compare insns in i386.md store the information needed to
959 generate branch and scc insns here. */
961 rtx ix86_compare_op0 = NULL_RTX;
962 rtx ix86_compare_op1 = NULL_RTX;
963 rtx ix86_compare_emitted = NULL_RTX;
965 /* Size of the register save area. */
966 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
968 /* Define the structure for the machine field in struct function. */
970 struct stack_local_entry GTY(())
972 unsigned short mode;
973 unsigned short n;
974 rtx rtl;
975 struct stack_local_entry *next;
978 /* Structure describing stack frame layout.
979 Stack grows downward:
981 [arguments]
982 <- ARG_POINTER
983 saved pc
985 saved frame pointer if frame_pointer_needed
986 <- HARD_FRAME_POINTER
987 [saved regs]
989 [padding1] \
991 [va_arg registers] (
992 > to_allocate <- FRAME_POINTER
993 [frame] (
995 [padding2] /
997 struct ix86_frame
999 int nregs;
1000 int padding1;
1001 int va_arg_size;
1002 HOST_WIDE_INT frame;
1003 int padding2;
1004 int outgoing_arguments_size;
1005 int red_zone_size;
1007 HOST_WIDE_INT to_allocate;
1008 /* The offsets relative to ARG_POINTER. */
1009 HOST_WIDE_INT frame_pointer_offset;
1010 HOST_WIDE_INT hard_frame_pointer_offset;
1011 HOST_WIDE_INT stack_pointer_offset;
1013 /* When save_regs_using_mov is set, emit prologue using
1014 move instead of push instructions. */
1015 bool save_regs_using_mov;
1018 /* Code model option. */
1019 enum cmodel ix86_cmodel;
1020 /* Asm dialect. */
1021 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1022 /* TLS dialects. */
1023 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1025 /* Which unit we are generating floating point math for. */
1026 enum fpmath_unit ix86_fpmath;
1028 /* Which cpu are we scheduling for. */
1029 enum processor_type ix86_tune;
1030 /* Which instruction set architecture to use. */
1031 enum processor_type ix86_arch;
1033 /* true if sse prefetch instruction is not NOOP. */
1034 int x86_prefetch_sse;
1036 /* ix86_regparm_string as a number */
1037 static int ix86_regparm;
1039 /* Preferred alignment for stack boundary in bits. */
1040 unsigned int ix86_preferred_stack_boundary;
1042 /* Values 1-5: see jump.c */
1043 int ix86_branch_cost;
1045 /* Variables which are this size or smaller are put in the data/bss
1046 or ldata/lbss sections. */
1048 int ix86_section_threshold = 65536;
1050 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1051 char internal_label_prefix[16];
1052 int internal_label_prefix_len;
1054 static bool ix86_handle_option (size_t, const char *, int);
1055 static void output_pic_addr_const (FILE *, rtx, int);
1056 static void put_condition_code (enum rtx_code, enum machine_mode,
1057 int, int, FILE *);
1058 static const char *get_some_local_dynamic_name (void);
1059 static int get_some_local_dynamic_name_1 (rtx *, void *);
1060 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1061 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1062 rtx *);
1063 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1064 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1065 enum machine_mode);
1066 static rtx get_thread_pointer (int);
1067 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1068 static void get_pc_thunk_name (char [32], unsigned int);
1069 static rtx gen_push (rtx);
1070 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
1071 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
1072 static struct machine_function * ix86_init_machine_status (void);
1073 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1074 static int ix86_nsaved_regs (void);
1075 static void ix86_emit_save_regs (void);
1076 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1077 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1078 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1079 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1080 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1081 static rtx ix86_expand_aligntest (rtx, int);
1082 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1083 static int ix86_issue_rate (void);
1084 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1085 static int ia32_multipass_dfa_lookahead (void);
1086 static void ix86_init_mmx_sse_builtins (void);
1087 static rtx x86_this_parameter (tree);
1088 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1089 HOST_WIDE_INT, tree);
1090 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1091 static void x86_file_start (void);
1092 static void ix86_reorg (void);
1093 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1094 static tree ix86_build_builtin_va_list (void);
1095 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1096 tree, int *, int);
1097 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1098 static bool ix86_vector_mode_supported_p (enum machine_mode);
1100 static int ix86_address_cost (rtx);
1101 static bool ix86_cannot_force_const_mem (rtx);
1102 static rtx ix86_delegitimize_address (rtx);
1104 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1106 struct builtin_description;
1107 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1108 tree, rtx);
1109 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1110 tree, rtx);
1111 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1112 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1113 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1114 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1115 static rtx safe_vector_operand (rtx, enum machine_mode);
1116 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1117 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1118 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1119 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1120 static int ix86_fp_comparison_cost (enum rtx_code code);
1121 static unsigned int ix86_select_alt_pic_regnum (void);
1122 static int ix86_save_reg (unsigned int, int);
1123 static void ix86_compute_frame_layout (struct ix86_frame *);
1124 static int ix86_comp_type_attributes (tree, tree);
1125 static int ix86_function_regparm (tree, tree);
1126 const struct attribute_spec ix86_attribute_table[];
1127 static bool ix86_function_ok_for_sibcall (tree, tree);
1128 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1129 static int ix86_value_regno (enum machine_mode, tree, tree);
1130 static bool contains_128bit_aligned_vector_p (tree);
1131 static rtx ix86_struct_value_rtx (tree, int);
1132 static bool ix86_ms_bitfield_layout_p (tree);
1133 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1134 static int extended_reg_mentioned_1 (rtx *, void *);
1135 static bool ix86_rtx_costs (rtx, int, int, int *);
1136 static int min_insn_size (rtx);
1137 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1138 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1139 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1140 tree, bool);
1141 static void ix86_init_builtins (void);
1142 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1143 static const char *ix86_mangle_fundamental_type (tree);
1144 static tree ix86_stack_protect_fail (void);
1145 static rtx ix86_internal_arg_pointer (void);
1146 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1148 /* This function is only used on Solaris. */
1149 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1150 ATTRIBUTE_UNUSED;
1152 /* Register class used for passing given 64bit part of the argument.
1153 These represent classes as documented by the PS ABI, with the exception
1154 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1155 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1157 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1158 whenever possible (upper half does contain padding).
1160 enum x86_64_reg_class
1162 X86_64_NO_CLASS,
1163 X86_64_INTEGER_CLASS,
1164 X86_64_INTEGERSI_CLASS,
1165 X86_64_SSE_CLASS,
1166 X86_64_SSESF_CLASS,
1167 X86_64_SSEDF_CLASS,
1168 X86_64_SSEUP_CLASS,
1169 X86_64_X87_CLASS,
1170 X86_64_X87UP_CLASS,
1171 X86_64_COMPLEX_X87_CLASS,
1172 X86_64_MEMORY_CLASS
1174 static const char * const x86_64_reg_class_name[] = {
1175 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1176 "sseup", "x87", "x87up", "cplx87", "no"
1179 #define MAX_CLASSES 4
1181 /* Table of constants used by fldpi, fldln2, etc.... */
1182 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1183 static bool ext_80387_constants_init = 0;
1184 static void init_ext_80387_constants (void);
1185 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1186 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1187 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1188 static section *x86_64_elf_select_section (tree decl, int reloc,
1189 unsigned HOST_WIDE_INT align)
1190 ATTRIBUTE_UNUSED;
1192 /* Initialize the GCC target structure. */
1193 #undef TARGET_ATTRIBUTE_TABLE
1194 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1195 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1196 # undef TARGET_MERGE_DECL_ATTRIBUTES
1197 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1198 #endif
1200 #undef TARGET_COMP_TYPE_ATTRIBUTES
1201 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1203 #undef TARGET_INIT_BUILTINS
1204 #define TARGET_INIT_BUILTINS ix86_init_builtins
1205 #undef TARGET_EXPAND_BUILTIN
1206 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1208 #undef TARGET_ASM_FUNCTION_EPILOGUE
1209 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1211 #undef TARGET_ENCODE_SECTION_INFO
1212 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1213 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1214 #else
1215 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1216 #endif
1218 #undef TARGET_ASM_OPEN_PAREN
1219 #define TARGET_ASM_OPEN_PAREN ""
1220 #undef TARGET_ASM_CLOSE_PAREN
1221 #define TARGET_ASM_CLOSE_PAREN ""
1223 #undef TARGET_ASM_ALIGNED_HI_OP
1224 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1225 #undef TARGET_ASM_ALIGNED_SI_OP
1226 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1227 #ifdef ASM_QUAD
1228 #undef TARGET_ASM_ALIGNED_DI_OP
1229 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1230 #endif
1232 #undef TARGET_ASM_UNALIGNED_HI_OP
1233 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1234 #undef TARGET_ASM_UNALIGNED_SI_OP
1235 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1236 #undef TARGET_ASM_UNALIGNED_DI_OP
1237 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1239 #undef TARGET_SCHED_ADJUST_COST
1240 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1241 #undef TARGET_SCHED_ISSUE_RATE
1242 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1243 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1244 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1245 ia32_multipass_dfa_lookahead
1247 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1248 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1250 #ifdef HAVE_AS_TLS
1251 #undef TARGET_HAVE_TLS
1252 #define TARGET_HAVE_TLS true
1253 #endif
1254 #undef TARGET_CANNOT_FORCE_CONST_MEM
1255 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1257 #undef TARGET_DELEGITIMIZE_ADDRESS
1258 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1260 #undef TARGET_MS_BITFIELD_LAYOUT_P
1261 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1263 #if TARGET_MACHO
1264 #undef TARGET_BINDS_LOCAL_P
1265 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1266 #endif
1268 #undef TARGET_ASM_OUTPUT_MI_THUNK
1269 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1270 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1271 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1273 #undef TARGET_ASM_FILE_START
1274 #define TARGET_ASM_FILE_START x86_file_start
1276 #undef TARGET_DEFAULT_TARGET_FLAGS
1277 #define TARGET_DEFAULT_TARGET_FLAGS \
1278 (TARGET_DEFAULT \
1279 | TARGET_64BIT_DEFAULT \
1280 | TARGET_SUBTARGET_DEFAULT \
1281 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1283 #undef TARGET_HANDLE_OPTION
1284 #define TARGET_HANDLE_OPTION ix86_handle_option
1286 #undef TARGET_RTX_COSTS
1287 #define TARGET_RTX_COSTS ix86_rtx_costs
1288 #undef TARGET_ADDRESS_COST
1289 #define TARGET_ADDRESS_COST ix86_address_cost
1291 #undef TARGET_FIXED_CONDITION_CODE_REGS
1292 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1293 #undef TARGET_CC_MODES_COMPATIBLE
1294 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1296 #undef TARGET_MACHINE_DEPENDENT_REORG
1297 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1299 #undef TARGET_BUILD_BUILTIN_VA_LIST
1300 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1302 #undef TARGET_MD_ASM_CLOBBERS
1303 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1305 #undef TARGET_PROMOTE_PROTOTYPES
1306 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1307 #undef TARGET_STRUCT_VALUE_RTX
1308 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1309 #undef TARGET_SETUP_INCOMING_VARARGS
1310 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1311 #undef TARGET_MUST_PASS_IN_STACK
1312 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1313 #undef TARGET_PASS_BY_REFERENCE
1314 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1315 #undef TARGET_INTERNAL_ARG_POINTER
1316 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1317 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1318 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1320 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1321 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1323 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1324 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1326 #ifdef HAVE_AS_TLS
1327 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1328 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1329 #endif
1331 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1332 #undef TARGET_INSERT_ATTRIBUTES
1333 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1334 #endif
1336 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1337 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1339 #undef TARGET_STACK_PROTECT_FAIL
1340 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1342 #undef TARGET_FUNCTION_VALUE
1343 #define TARGET_FUNCTION_VALUE ix86_function_value
1345 struct gcc_target targetm = TARGET_INITIALIZER;
1348 /* The svr4 ABI for the i386 says that records and unions are returned
1349 in memory. */
1350 #ifndef DEFAULT_PCC_STRUCT_RETURN
1351 #define DEFAULT_PCC_STRUCT_RETURN 1
1352 #endif
1354 /* Implement TARGET_HANDLE_OPTION. */
1356 static bool
1357 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1359 switch (code)
1361 case OPT_m3dnow:
1362 if (!value)
1364 target_flags &= ~MASK_3DNOW_A;
1365 target_flags_explicit |= MASK_3DNOW_A;
1367 return true;
1369 case OPT_mmmx:
1370 if (!value)
1372 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1373 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1375 return true;
1377 case OPT_msse:
1378 if (!value)
1380 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1381 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1383 return true;
1385 case OPT_msse2:
1386 if (!value)
1388 target_flags &= ~MASK_SSE3;
1389 target_flags_explicit |= MASK_SSE3;
1391 return true;
1393 default:
1394 return true;
1398 /* Sometimes certain combinations of command options do not make
1399 sense on a particular target machine. You can define a macro
1400 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1401 defined, is executed once just after all the command options have
1402 been parsed.
1404 Don't use this macro to turn on various extra optimizations for
1405 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1407 void
1408 override_options (void)
1410 int i;
1411 int ix86_tune_defaulted = 0;
1413 /* Comes from final.c -- no real reason to change it. */
1414 #define MAX_CODE_ALIGN 16
1416 static struct ptt
1418 const struct processor_costs *cost; /* Processor costs */
1419 const int target_enable; /* Target flags to enable. */
1420 const int target_disable; /* Target flags to disable. */
1421 const int align_loop; /* Default alignments. */
1422 const int align_loop_max_skip;
1423 const int align_jump;
1424 const int align_jump_max_skip;
1425 const int align_func;
1427 const processor_target_table[PROCESSOR_max] =
1429 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1430 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1431 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1432 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1433 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1434 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1435 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1436 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1437 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1438 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1439 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1442 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1443 static struct pta
1445 const char *const name; /* processor name or nickname. */
1446 const enum processor_type processor;
1447 const enum pta_flags
1449 PTA_SSE = 1,
1450 PTA_SSE2 = 2,
1451 PTA_SSE3 = 4,
1452 PTA_MMX = 8,
1453 PTA_PREFETCH_SSE = 16,
1454 PTA_3DNOW = 32,
1455 PTA_3DNOW_A = 64,
1456 PTA_64BIT = 128
1457 } flags;
1459 const processor_alias_table[] =
1461 {"i386", PROCESSOR_I386, 0},
1462 {"i486", PROCESSOR_I486, 0},
1463 {"i586", PROCESSOR_PENTIUM, 0},
1464 {"pentium", PROCESSOR_PENTIUM, 0},
1465 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1466 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1467 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1468 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1469 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1470 {"i686", PROCESSOR_PENTIUMPRO, 0},
1471 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1472 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1473 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1474 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1475 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1476 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1477 | PTA_MMX | PTA_PREFETCH_SSE},
1478 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1479 | PTA_MMX | PTA_PREFETCH_SSE},
1480 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1481 | PTA_MMX | PTA_PREFETCH_SSE},
1482 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1483 | PTA_MMX | PTA_PREFETCH_SSE},
1484 {"k6", PROCESSOR_K6, PTA_MMX},
1485 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1486 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1487 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1488 | PTA_3DNOW_A},
1489 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1490 | PTA_3DNOW | PTA_3DNOW_A},
1491 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1492 | PTA_3DNOW_A | PTA_SSE},
1493 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1494 | PTA_3DNOW_A | PTA_SSE},
1495 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1496 | PTA_3DNOW_A | PTA_SSE},
1497 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1498 | PTA_SSE | PTA_SSE2 },
1499 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1500 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1501 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1502 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1503 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1504 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1505 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1506 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1507 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1508 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1511 int const pta_size = ARRAY_SIZE (processor_alias_table);
1513 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1514 SUBTARGET_OVERRIDE_OPTIONS;
1515 #endif
1517 /* Set the default values for switches whose default depends on TARGET_64BIT
1518 in case they weren't overwritten by command line options. */
1519 if (TARGET_64BIT)
1521 if (flag_omit_frame_pointer == 2)
1522 flag_omit_frame_pointer = 1;
1523 if (flag_asynchronous_unwind_tables == 2)
1524 flag_asynchronous_unwind_tables = 1;
1525 if (flag_pcc_struct_return == 2)
1526 flag_pcc_struct_return = 0;
1528 else
1530 if (flag_omit_frame_pointer == 2)
1531 flag_omit_frame_pointer = 0;
1532 if (flag_asynchronous_unwind_tables == 2)
1533 flag_asynchronous_unwind_tables = 0;
1534 if (flag_pcc_struct_return == 2)
1535 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1538 /* Need to check -mtune=generic first. */
1539 if (ix86_tune_string)
1541 if (!strcmp (ix86_tune_string, "generic")
1542 || !strcmp (ix86_tune_string, "i686"))
1544 if (TARGET_64BIT)
1545 ix86_tune_string = "generic64";
1546 else
1547 ix86_tune_string = "generic32";
1549 else if (!strncmp (ix86_tune_string, "generic", 7))
1550 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1552 else
1554 if (ix86_arch_string)
1555 ix86_tune_string = ix86_arch_string;
1556 if (!ix86_tune_string)
1558 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1559 ix86_tune_defaulted = 1;
1562 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1563 need to use a sensible tune option. */
1564 if (!strcmp (ix86_tune_string, "generic")
1565 || !strcmp (ix86_tune_string, "x86-64")
1566 || !strcmp (ix86_tune_string, "i686"))
1568 if (TARGET_64BIT)
1569 ix86_tune_string = "generic64";
1570 else
1571 ix86_tune_string = "generic32";
1574 if (!strcmp (ix86_tune_string, "x86-64"))
1575 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1576 "-mtune=generic instead as appropriate.");
1578 if (!ix86_arch_string)
1579 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1580 if (!strcmp (ix86_arch_string, "generic"))
1581 error ("generic CPU can be used only for -mtune= switch");
1582 if (!strncmp (ix86_arch_string, "generic", 7))
1583 error ("bad value (%s) for -march= switch", ix86_arch_string);
1585 if (ix86_cmodel_string != 0)
1587 if (!strcmp (ix86_cmodel_string, "small"))
1588 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1589 else if (!strcmp (ix86_cmodel_string, "medium"))
1590 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1591 else if (flag_pic)
1592 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1593 else if (!strcmp (ix86_cmodel_string, "32"))
1594 ix86_cmodel = CM_32;
1595 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1596 ix86_cmodel = CM_KERNEL;
1597 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1598 ix86_cmodel = CM_LARGE;
1599 else
1600 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1602 else
1604 ix86_cmodel = CM_32;
1605 if (TARGET_64BIT)
1606 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1608 if (ix86_asm_string != 0)
1610 if (! TARGET_MACHO
1611 && !strcmp (ix86_asm_string, "intel"))
1612 ix86_asm_dialect = ASM_INTEL;
1613 else if (!strcmp (ix86_asm_string, "att"))
1614 ix86_asm_dialect = ASM_ATT;
1615 else
1616 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1618 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1619 error ("code model %qs not supported in the %s bit mode",
1620 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1621 if (ix86_cmodel == CM_LARGE)
1622 sorry ("code model %<large%> not supported yet");
1623 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1624 sorry ("%i-bit mode not compiled in",
1625 (target_flags & MASK_64BIT) ? 64 : 32);
1627 for (i = 0; i < pta_size; i++)
1628 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1630 ix86_arch = processor_alias_table[i].processor;
1631 /* Default cpu tuning to the architecture. */
1632 ix86_tune = ix86_arch;
1633 if (processor_alias_table[i].flags & PTA_MMX
1634 && !(target_flags_explicit & MASK_MMX))
1635 target_flags |= MASK_MMX;
1636 if (processor_alias_table[i].flags & PTA_3DNOW
1637 && !(target_flags_explicit & MASK_3DNOW))
1638 target_flags |= MASK_3DNOW;
1639 if (processor_alias_table[i].flags & PTA_3DNOW_A
1640 && !(target_flags_explicit & MASK_3DNOW_A))
1641 target_flags |= MASK_3DNOW_A;
1642 if (processor_alias_table[i].flags & PTA_SSE
1643 && !(target_flags_explicit & MASK_SSE))
1644 target_flags |= MASK_SSE;
1645 if (processor_alias_table[i].flags & PTA_SSE2
1646 && !(target_flags_explicit & MASK_SSE2))
1647 target_flags |= MASK_SSE2;
1648 if (processor_alias_table[i].flags & PTA_SSE3
1649 && !(target_flags_explicit & MASK_SSE3))
1650 target_flags |= MASK_SSE3;
1651 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1652 x86_prefetch_sse = true;
1653 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1654 error ("CPU you selected does not support x86-64 "
1655 "instruction set");
1656 break;
1659 if (i == pta_size)
1660 error ("bad value (%s) for -march= switch", ix86_arch_string);
1662 for (i = 0; i < pta_size; i++)
1663 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1665 ix86_tune = processor_alias_table[i].processor;
1666 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1668 if (ix86_tune_defaulted)
1670 ix86_tune_string = "x86-64";
1671 for (i = 0; i < pta_size; i++)
1672 if (! strcmp (ix86_tune_string,
1673 processor_alias_table[i].name))
1674 break;
1675 ix86_tune = processor_alias_table[i].processor;
1677 else
1678 error ("CPU you selected does not support x86-64 "
1679 "instruction set");
1681 /* Intel CPUs have always interpreted SSE prefetch instructions as
1682 NOPs; so, we can enable SSE prefetch instructions even when
1683 -mtune (rather than -march) points us to a processor that has them.
1684 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1685 higher processors. */
1686 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1687 x86_prefetch_sse = true;
1688 break;
1690 if (i == pta_size)
1691 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1693 if (optimize_size)
1694 ix86_cost = &size_cost;
1695 else
1696 ix86_cost = processor_target_table[ix86_tune].cost;
1697 target_flags |= processor_target_table[ix86_tune].target_enable;
1698 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1700 /* Arrange to set up i386_stack_locals for all functions. */
1701 init_machine_status = ix86_init_machine_status;
1703 /* Validate -mregparm= value. */
1704 if (ix86_regparm_string)
1706 i = atoi (ix86_regparm_string);
1707 if (i < 0 || i > REGPARM_MAX)
1708 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1709 else
1710 ix86_regparm = i;
1712 else
1713 if (TARGET_64BIT)
1714 ix86_regparm = REGPARM_MAX;
1716 /* If the user has provided any of the -malign-* options,
1717 warn and use that value only if -falign-* is not set.
1718 Remove this code in GCC 3.2 or later. */
1719 if (ix86_align_loops_string)
1721 warning (0, "-malign-loops is obsolete, use -falign-loops");
1722 if (align_loops == 0)
1724 i = atoi (ix86_align_loops_string);
1725 if (i < 0 || i > MAX_CODE_ALIGN)
1726 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1727 else
1728 align_loops = 1 << i;
1732 if (ix86_align_jumps_string)
1734 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1735 if (align_jumps == 0)
1737 i = atoi (ix86_align_jumps_string);
1738 if (i < 0 || i > MAX_CODE_ALIGN)
1739 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1740 else
1741 align_jumps = 1 << i;
1745 if (ix86_align_funcs_string)
1747 warning (0, "-malign-functions is obsolete, use -falign-functions");
1748 if (align_functions == 0)
1750 i = atoi (ix86_align_funcs_string);
1751 if (i < 0 || i > MAX_CODE_ALIGN)
1752 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1753 else
1754 align_functions = 1 << i;
1758 /* Default align_* from the processor table. */
1759 if (align_loops == 0)
1761 align_loops = processor_target_table[ix86_tune].align_loop;
1762 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1764 if (align_jumps == 0)
1766 align_jumps = processor_target_table[ix86_tune].align_jump;
1767 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1769 if (align_functions == 0)
1771 align_functions = processor_target_table[ix86_tune].align_func;
1774 /* Validate -mpreferred-stack-boundary= value, or provide default.
1775 The default of 128 bits is for Pentium III's SSE __m128, but we
1776 don't want additional code to keep the stack aligned when
1777 optimizing for code size. */
1778 ix86_preferred_stack_boundary = ((TARGET_64BIT || TARGET_MACHO || !optimize_size)
1779 ? 128 : 32);
1780 if (ix86_preferred_stack_boundary_string)
1782 i = atoi (ix86_preferred_stack_boundary_string);
1783 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1784 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1785 TARGET_64BIT ? 4 : 2);
1786 else
1787 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1790 /* Validate -mbranch-cost= value, or provide default. */
1791 ix86_branch_cost = ix86_cost->branch_cost;
1792 if (ix86_branch_cost_string)
1794 i = atoi (ix86_branch_cost_string);
1795 if (i < 0 || i > 5)
1796 error ("-mbranch-cost=%d is not between 0 and 5", i);
1797 else
1798 ix86_branch_cost = i;
1800 if (ix86_section_threshold_string)
1802 i = atoi (ix86_section_threshold_string);
1803 if (i < 0)
1804 error ("-mlarge-data-threshold=%d is negative", i);
1805 else
1806 ix86_section_threshold = i;
1809 if (ix86_tls_dialect_string)
1811 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1812 ix86_tls_dialect = TLS_DIALECT_GNU;
1813 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1814 ix86_tls_dialect = TLS_DIALECT_GNU2;
1815 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1816 ix86_tls_dialect = TLS_DIALECT_SUN;
1817 else
1818 error ("bad value (%s) for -mtls-dialect= switch",
1819 ix86_tls_dialect_string);
1822 /* Keep nonleaf frame pointers. */
1823 if (flag_omit_frame_pointer)
1824 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1825 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1826 flag_omit_frame_pointer = 1;
1828 /* If we're doing fast math, we don't care about comparison order
1829 wrt NaNs. This lets us use a shorter comparison sequence. */
1830 if (flag_unsafe_math_optimizations)
1831 target_flags &= ~MASK_IEEE_FP;
1833 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1834 since the insns won't need emulation. */
1835 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1836 target_flags &= ~MASK_NO_FANCY_MATH_387;
1838 /* Likewise, if the target doesn't have a 387, or we've specified
1839 software floating point, don't use 387 inline intrinsics. */
1840 if (!TARGET_80387)
1841 target_flags |= MASK_NO_FANCY_MATH_387;
1843 /* Turn on SSE2 builtins for -msse3. */
1844 if (TARGET_SSE3)
1845 target_flags |= MASK_SSE2;
1847 /* Turn on SSE builtins for -msse2. */
1848 if (TARGET_SSE2)
1849 target_flags |= MASK_SSE;
1851 /* Turn on MMX builtins for -msse. */
1852 if (TARGET_SSE)
1854 target_flags |= MASK_MMX & ~target_flags_explicit;
1855 x86_prefetch_sse = true;
1858 /* Turn on MMX builtins for 3Dnow. */
1859 if (TARGET_3DNOW)
1860 target_flags |= MASK_MMX;
1862 if (TARGET_64BIT)
1864 if (TARGET_ALIGN_DOUBLE)
1865 error ("-malign-double makes no sense in the 64bit mode");
1866 if (TARGET_RTD)
1867 error ("-mrtd calling convention not supported in the 64bit mode");
1869 /* Enable by default the SSE and MMX builtins. Do allow the user to
1870 explicitly disable any of these. In particular, disabling SSE and
1871 MMX for kernel code is extremely useful. */
1872 target_flags
1873 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1874 & ~target_flags_explicit);
1876 else
1878 /* i386 ABI does not specify red zone. It still makes sense to use it
1879 when programmer takes care to stack from being destroyed. */
1880 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1881 target_flags |= MASK_NO_RED_ZONE;
1884 /* Accept -msseregparm only if at least SSE support is enabled. */
1885 if (TARGET_SSEREGPARM
1886 && ! TARGET_SSE)
1887 error ("-msseregparm used without SSE enabled");
1889 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1891 if (ix86_fpmath_string != 0)
1893 if (! strcmp (ix86_fpmath_string, "387"))
1894 ix86_fpmath = FPMATH_387;
1895 else if (! strcmp (ix86_fpmath_string, "sse"))
1897 if (!TARGET_SSE)
1899 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1900 ix86_fpmath = FPMATH_387;
1902 else
1903 ix86_fpmath = FPMATH_SSE;
1905 else if (! strcmp (ix86_fpmath_string, "387,sse")
1906 || ! strcmp (ix86_fpmath_string, "sse,387"))
1908 if (!TARGET_SSE)
1910 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1911 ix86_fpmath = FPMATH_387;
1913 else if (!TARGET_80387)
1915 warning (0, "387 instruction set disabled, using SSE arithmetics");
1916 ix86_fpmath = FPMATH_SSE;
1918 else
1919 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1921 else
1922 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1925 /* If the i387 is disabled, then do not return values in it. */
1926 if (!TARGET_80387)
1927 target_flags &= ~MASK_FLOAT_RETURNS;
1929 if ((x86_accumulate_outgoing_args & TUNEMASK)
1930 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1931 && !optimize_size)
1932 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1934 /* ??? Unwind info is not correct around the CFG unless either a frame
1935 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1936 unwind info generation to be aware of the CFG and propagating states
1937 around edges. */
1938 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1939 || flag_exceptions || flag_non_call_exceptions)
1940 && flag_omit_frame_pointer
1941 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1943 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1944 warning (0, "unwind tables currently require either a frame pointer "
1945 "or -maccumulate-outgoing-args for correctness");
1946 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1949 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1951 char *p;
1952 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1953 p = strchr (internal_label_prefix, 'X');
1954 internal_label_prefix_len = p - internal_label_prefix;
1955 *p = '\0';
1958 /* When scheduling description is not available, disable scheduler pass
1959 so it won't slow down the compilation and make x87 code slower. */
1960 if (!TARGET_SCHEDULE)
1961 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1964 /* switch to the appropriate section for output of DECL.
1965 DECL is either a `VAR_DECL' node or a constant of some sort.
1966 RELOC indicates whether forming the initial value of DECL requires
1967 link-time relocations. */
1969 static section *
1970 x86_64_elf_select_section (tree decl, int reloc,
1971 unsigned HOST_WIDE_INT align)
1973 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1974 && ix86_in_large_data_p (decl))
1976 const char *sname = NULL;
1977 unsigned int flags = SECTION_WRITE;
1978 switch (categorize_decl_for_section (decl, reloc, flag_pic))
1980 case SECCAT_DATA:
1981 sname = ".ldata";
1982 break;
1983 case SECCAT_DATA_REL:
1984 sname = ".ldata.rel";
1985 break;
1986 case SECCAT_DATA_REL_LOCAL:
1987 sname = ".ldata.rel.local";
1988 break;
1989 case SECCAT_DATA_REL_RO:
1990 sname = ".ldata.rel.ro";
1991 break;
1992 case SECCAT_DATA_REL_RO_LOCAL:
1993 sname = ".ldata.rel.ro.local";
1994 break;
1995 case SECCAT_BSS:
1996 sname = ".lbss";
1997 flags |= SECTION_BSS;
1998 break;
1999 case SECCAT_RODATA:
2000 case SECCAT_RODATA_MERGE_STR:
2001 case SECCAT_RODATA_MERGE_STR_INIT:
2002 case SECCAT_RODATA_MERGE_CONST:
2003 sname = ".lrodata";
2004 flags = 0;
2005 break;
2006 case SECCAT_SRODATA:
2007 case SECCAT_SDATA:
2008 case SECCAT_SBSS:
2009 gcc_unreachable ();
2010 case SECCAT_TEXT:
2011 case SECCAT_TDATA:
2012 case SECCAT_TBSS:
2013 /* We don't split these for medium model. Place them into
2014 default sections and hope for best. */
2015 break;
2017 if (sname)
2019 /* We might get called with string constants, but get_named_section
2020 doesn't like them as they are not DECLs. Also, we need to set
2021 flags in that case. */
2022 if (!DECL_P (decl))
2023 return get_section (sname, flags, NULL);
2024 return get_named_section (decl, sname, reloc);
2027 return default_elf_select_section (decl, reloc, align);
2030 /* Build up a unique section name, expressed as a
2031 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2032 RELOC indicates whether the initial value of EXP requires
2033 link-time relocations. */
2035 static void
2036 x86_64_elf_unique_section (tree decl, int reloc)
2038 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2039 && ix86_in_large_data_p (decl))
2041 const char *prefix = NULL;
2042 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2043 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2045 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2047 case SECCAT_DATA:
2048 case SECCAT_DATA_REL:
2049 case SECCAT_DATA_REL_LOCAL:
2050 case SECCAT_DATA_REL_RO:
2051 case SECCAT_DATA_REL_RO_LOCAL:
2052 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2053 break;
2054 case SECCAT_BSS:
2055 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2056 break;
2057 case SECCAT_RODATA:
2058 case SECCAT_RODATA_MERGE_STR:
2059 case SECCAT_RODATA_MERGE_STR_INIT:
2060 case SECCAT_RODATA_MERGE_CONST:
2061 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2062 break;
2063 case SECCAT_SRODATA:
2064 case SECCAT_SDATA:
2065 case SECCAT_SBSS:
2066 gcc_unreachable ();
2067 case SECCAT_TEXT:
2068 case SECCAT_TDATA:
2069 case SECCAT_TBSS:
2070 /* We don't split these for medium model. Place them into
2071 default sections and hope for best. */
2072 break;
2074 if (prefix)
2076 const char *name;
2077 size_t nlen, plen;
2078 char *string;
2079 plen = strlen (prefix);
2081 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2082 name = targetm.strip_name_encoding (name);
2083 nlen = strlen (name);
2085 string = alloca (nlen + plen + 1);
2086 memcpy (string, prefix, plen);
2087 memcpy (string + plen, name, nlen + 1);
2089 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2090 return;
2093 default_unique_section (decl, reloc);
2096 #ifdef COMMON_ASM_OP
2097 /* This says how to output assembler code to declare an
2098 uninitialized external linkage data object.
2100 For medium model x86-64 we need to use .largecomm opcode for
2101 large objects. */
2102 void
2103 x86_elf_aligned_common (FILE *file,
2104 const char *name, unsigned HOST_WIDE_INT size,
2105 int align)
2107 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2108 && size > (unsigned int)ix86_section_threshold)
2109 fprintf (file, ".largecomm\t");
2110 else
2111 fprintf (file, "%s", COMMON_ASM_OP);
2112 assemble_name (file, name);
2113 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2114 size, align / BITS_PER_UNIT);
2117 /* Utility function for targets to use in implementing
2118 ASM_OUTPUT_ALIGNED_BSS. */
2120 void
2121 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2122 const char *name, unsigned HOST_WIDE_INT size,
2123 int align)
2125 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2126 && size > (unsigned int)ix86_section_threshold)
2127 switch_to_section (get_named_section (decl, ".lbss", 0));
2128 else
2129 switch_to_section (bss_section);
2130 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2131 #ifdef ASM_DECLARE_OBJECT_NAME
2132 last_assemble_variable_decl = decl;
2133 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2134 #else
2135 /* Standard thing is just output label for the object. */
2136 ASM_OUTPUT_LABEL (file, name);
2137 #endif /* ASM_DECLARE_OBJECT_NAME */
2138 ASM_OUTPUT_SKIP (file, size ? size : 1);
2140 #endif
2142 void
2143 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2145 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2146 make the problem with not enough registers even worse. */
2147 #ifdef INSN_SCHEDULING
2148 if (level > 1)
2149 flag_schedule_insns = 0;
2150 #endif
2152 if (TARGET_MACHO)
2153 /* The Darwin libraries never set errno, so we might as well
2154 avoid calling them when that's the only reason we would. */
2155 flag_errno_math = 0;
2157 /* The default values of these switches depend on the TARGET_64BIT
2158 that is not known at this moment. Mark these values with 2 and
2159 let user the to override these. In case there is no command line option
2160 specifying them, we will set the defaults in override_options. */
2161 if (optimize >= 1)
2162 flag_omit_frame_pointer = 2;
2163 flag_pcc_struct_return = 2;
2164 flag_asynchronous_unwind_tables = 2;
2165 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2166 SUBTARGET_OPTIMIZATION_OPTIONS;
2167 #endif
2170 /* Table of valid machine attributes. */
2171 const struct attribute_spec ix86_attribute_table[] =
2173 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2174 /* Stdcall attribute says callee is responsible for popping arguments
2175 if they are not variable. */
2176 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2177 /* Fastcall attribute says callee is responsible for popping arguments
2178 if they are not variable. */
2179 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2180 /* Cdecl attribute says the callee is a normal C declaration */
2181 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2182 /* Regparm attribute specifies how many integer arguments are to be
2183 passed in registers. */
2184 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2185 /* Sseregparm attribute says we are using x86_64 calling conventions
2186 for FP arguments. */
2187 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2188 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2189 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2190 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2191 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2192 #endif
2193 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2194 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2195 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2196 SUBTARGET_ATTRIBUTE_TABLE,
2197 #endif
2198 { NULL, 0, 0, false, false, false, NULL }
2201 /* Decide whether we can make a sibling call to a function. DECL is the
2202 declaration of the function being targeted by the call and EXP is the
2203 CALL_EXPR representing the call. */
2205 static bool
2206 ix86_function_ok_for_sibcall (tree decl, tree exp)
2208 tree func;
2209 rtx a, b;
2211 /* If we are generating position-independent code, we cannot sibcall
2212 optimize any indirect call, or a direct call to a global function,
2213 as the PLT requires %ebx be live. */
2214 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
2215 return false;
2217 if (decl)
2218 func = decl;
2219 else
2221 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2222 if (POINTER_TYPE_P (func))
2223 func = TREE_TYPE (func);
2226 /* Check that the return value locations are the same. Like
2227 if we are returning floats on the 80387 register stack, we cannot
2228 make a sibcall from a function that doesn't return a float to a
2229 function that does or, conversely, from a function that does return
2230 a float to a function that doesn't; the necessary stack adjustment
2231 would not be executed. This is also the place we notice
2232 differences in the return value ABI. Note that it is ok for one
2233 of the functions to have void return type as long as the return
2234 value of the other is passed in a register. */
2235 a = ix86_function_value (TREE_TYPE (exp), func, false);
2236 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2237 cfun->decl, false);
2238 if (STACK_REG_P (a) || STACK_REG_P (b))
2240 if (!rtx_equal_p (a, b))
2241 return false;
2243 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2245 else if (!rtx_equal_p (a, b))
2246 return false;
2248 /* If this call is indirect, we'll need to be able to use a call-clobbered
2249 register for the address of the target function. Make sure that all
2250 such registers are not used for passing parameters. */
2251 if (!decl && !TARGET_64BIT)
2253 tree type;
2255 /* We're looking at the CALL_EXPR, we need the type of the function. */
2256 type = TREE_OPERAND (exp, 0); /* pointer expression */
2257 type = TREE_TYPE (type); /* pointer type */
2258 type = TREE_TYPE (type); /* function type */
2260 if (ix86_function_regparm (type, NULL) >= 3)
2262 /* ??? Need to count the actual number of registers to be used,
2263 not the possible number of registers. Fix later. */
2264 return false;
2268 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2269 /* Dllimport'd functions are also called indirectly. */
2270 if (decl && DECL_DLLIMPORT_P (decl)
2271 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2272 return false;
2273 #endif
2275 /* If we forced aligned the stack, then sibcalling would unalign the
2276 stack, which may break the called function. */
2277 if (cfun->machine->force_align_arg_pointer)
2278 return false;
2280 /* Otherwise okay. That also includes certain types of indirect calls. */
2281 return true;
2284 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2285 calling convention attributes;
2286 arguments as in struct attribute_spec.handler. */
2288 static tree
2289 ix86_handle_cconv_attribute (tree *node, tree name,
2290 tree args,
2291 int flags ATTRIBUTE_UNUSED,
2292 bool *no_add_attrs)
2294 if (TREE_CODE (*node) != FUNCTION_TYPE
2295 && TREE_CODE (*node) != METHOD_TYPE
2296 && TREE_CODE (*node) != FIELD_DECL
2297 && TREE_CODE (*node) != TYPE_DECL)
2299 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2300 IDENTIFIER_POINTER (name));
2301 *no_add_attrs = true;
2302 return NULL_TREE;
2305 /* Can combine regparm with all attributes but fastcall. */
2306 if (is_attribute_p ("regparm", name))
2308 tree cst;
2310 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2312 error ("fastcall and regparm attributes are not compatible");
2315 cst = TREE_VALUE (args);
2316 if (TREE_CODE (cst) != INTEGER_CST)
2318 warning (OPT_Wattributes,
2319 "%qs attribute requires an integer constant argument",
2320 IDENTIFIER_POINTER (name));
2321 *no_add_attrs = true;
2323 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2325 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2326 IDENTIFIER_POINTER (name), REGPARM_MAX);
2327 *no_add_attrs = true;
2330 return NULL_TREE;
2333 if (TARGET_64BIT)
2335 warning (OPT_Wattributes, "%qs attribute ignored",
2336 IDENTIFIER_POINTER (name));
2337 *no_add_attrs = true;
2338 return NULL_TREE;
2341 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2342 if (is_attribute_p ("fastcall", name))
2344 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2346 error ("fastcall and cdecl attributes are not compatible");
2348 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2350 error ("fastcall and stdcall attributes are not compatible");
2352 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2354 error ("fastcall and regparm attributes are not compatible");
2358 /* Can combine stdcall with fastcall (redundant), regparm and
2359 sseregparm. */
2360 else if (is_attribute_p ("stdcall", name))
2362 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2364 error ("stdcall and cdecl attributes are not compatible");
2366 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2368 error ("stdcall and fastcall attributes are not compatible");
2372 /* Can combine cdecl with regparm and sseregparm. */
2373 else if (is_attribute_p ("cdecl", name))
2375 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2377 error ("stdcall and cdecl attributes are not compatible");
2379 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2381 error ("fastcall and cdecl attributes are not compatible");
2385 /* Can combine sseregparm with all attributes. */
2387 return NULL_TREE;
2390 /* Return 0 if the attributes for two types are incompatible, 1 if they
2391 are compatible, and 2 if they are nearly compatible (which causes a
2392 warning to be generated). */
2394 static int
2395 ix86_comp_type_attributes (tree type1, tree type2)
2397 /* Check for mismatch of non-default calling convention. */
2398 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2400 if (TREE_CODE (type1) != FUNCTION_TYPE)
2401 return 1;
2403 /* Check for mismatched fastcall/regparm types. */
2404 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2405 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2406 || (ix86_function_regparm (type1, NULL)
2407 != ix86_function_regparm (type2, NULL)))
2408 return 0;
2410 /* Check for mismatched sseregparm types. */
2411 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2412 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2413 return 0;
2415 /* Check for mismatched return types (cdecl vs stdcall). */
2416 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2417 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2418 return 0;
2420 return 1;
2423 /* Return the regparm value for a function with the indicated TYPE and DECL.
2424 DECL may be NULL when calling function indirectly
2425 or considering a libcall. */
2427 static int
2428 ix86_function_regparm (tree type, tree decl)
2430 tree attr;
2431 int regparm = ix86_regparm;
2432 bool user_convention = false;
2434 if (!TARGET_64BIT)
2436 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2437 if (attr)
2439 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2440 user_convention = true;
2443 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2445 regparm = 2;
2446 user_convention = true;
2449 /* Use register calling convention for local functions when possible. */
2450 if (!TARGET_64BIT && !user_convention && decl
2451 && flag_unit_at_a_time && !profile_flag)
2453 struct cgraph_local_info *i = cgraph_local_info (decl);
2454 if (i && i->local)
2456 int local_regparm, globals = 0, regno;
2458 /* Make sure no regparm register is taken by a global register
2459 variable. */
2460 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2461 if (global_regs[local_regparm])
2462 break;
2463 /* We can't use regparm(3) for nested functions as these use
2464 static chain pointer in third argument. */
2465 if (local_regparm == 3
2466 && decl_function_context (decl)
2467 && !DECL_NO_STATIC_CHAIN (decl))
2468 local_regparm = 2;
2469 /* Each global register variable increases register preassure,
2470 so the more global reg vars there are, the smaller regparm
2471 optimization use, unless requested by the user explicitly. */
2472 for (regno = 0; regno < 6; regno++)
2473 if (global_regs[regno])
2474 globals++;
2475 local_regparm
2476 = globals < local_regparm ? local_regparm - globals : 0;
2478 if (local_regparm > regparm)
2479 regparm = local_regparm;
2483 return regparm;
2486 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2487 in SSE registers for a function with the indicated TYPE and DECL.
2488 DECL may be NULL when calling function indirectly
2489 or considering a libcall. Otherwise return 0. */
2491 static int
2492 ix86_function_sseregparm (tree type, tree decl)
2494 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2495 by the sseregparm attribute. */
2496 if (TARGET_SSEREGPARM
2497 || (type
2498 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2500 if (!TARGET_SSE)
2502 if (decl)
2503 error ("Calling %qD with attribute sseregparm without "
2504 "SSE/SSE2 enabled", decl);
2505 else
2506 error ("Calling %qT with attribute sseregparm without "
2507 "SSE/SSE2 enabled", type);
2508 return 0;
2511 return 2;
2514 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2515 in SSE registers even for 32-bit mode and not just 3, but up to
2516 8 SSE arguments in registers. */
2517 if (!TARGET_64BIT && decl
2518 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2520 struct cgraph_local_info *i = cgraph_local_info (decl);
2521 if (i && i->local)
2522 return TARGET_SSE2 ? 2 : 1;
2525 return 0;
2528 /* Return true if EAX is live at the start of the function. Used by
2529 ix86_expand_prologue to determine if we need special help before
2530 calling allocate_stack_worker. */
2532 static bool
2533 ix86_eax_live_at_start_p (void)
2535 /* Cheat. Don't bother working forward from ix86_function_regparm
2536 to the function type to whether an actual argument is located in
2537 eax. Instead just look at cfg info, which is still close enough
2538 to correct at this point. This gives false positives for broken
2539 functions that might use uninitialized data that happens to be
2540 allocated in eax, but who cares? */
2541 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2544 /* Value is the number of bytes of arguments automatically
2545 popped when returning from a subroutine call.
2546 FUNDECL is the declaration node of the function (as a tree),
2547 FUNTYPE is the data type of the function (as a tree),
2548 or for a library call it is an identifier node for the subroutine name.
2549 SIZE is the number of bytes of arguments passed on the stack.
2551 On the 80386, the RTD insn may be used to pop them if the number
2552 of args is fixed, but if the number is variable then the caller
2553 must pop them all. RTD can't be used for library calls now
2554 because the library is compiled with the Unix compiler.
2555 Use of RTD is a selectable option, since it is incompatible with
2556 standard Unix calling sequences. If the option is not selected,
2557 the caller must always pop the args.
2559 The attribute stdcall is equivalent to RTD on a per module basis. */
2562 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2564 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2566 /* Cdecl functions override -mrtd, and never pop the stack. */
2567 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2569 /* Stdcall and fastcall functions will pop the stack if not
2570 variable args. */
2571 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2572 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2573 rtd = 1;
2575 if (rtd
2576 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2577 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2578 == void_type_node)))
2579 return size;
2582 /* Lose any fake structure return argument if it is passed on the stack. */
2583 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2584 && !TARGET_64BIT
2585 && !KEEP_AGGREGATE_RETURN_POINTER)
2587 int nregs = ix86_function_regparm (funtype, fundecl);
2589 if (!nregs)
2590 return GET_MODE_SIZE (Pmode);
2593 return 0;
2596 /* Argument support functions. */
2598 /* Return true when register may be used to pass function parameters. */
2599 bool
2600 ix86_function_arg_regno_p (int regno)
2602 int i;
2603 if (!TARGET_64BIT)
2604 return (regno < REGPARM_MAX
2605 || (TARGET_MMX && MMX_REGNO_P (regno)
2606 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2607 || (TARGET_SSE && SSE_REGNO_P (regno)
2608 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2610 if (TARGET_SSE && SSE_REGNO_P (regno)
2611 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2612 return true;
2613 /* RAX is used as hidden argument to va_arg functions. */
2614 if (!regno)
2615 return true;
2616 for (i = 0; i < REGPARM_MAX; i++)
2617 if (regno == x86_64_int_parameter_registers[i])
2618 return true;
2619 return false;
2622 /* Return if we do not know how to pass TYPE solely in registers. */
2624 static bool
2625 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2627 if (must_pass_in_stack_var_size_or_pad (mode, type))
2628 return true;
2630 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2631 The layout_type routine is crafty and tries to trick us into passing
2632 currently unsupported vector types on the stack by using TImode. */
2633 return (!TARGET_64BIT && mode == TImode
2634 && type && TREE_CODE (type) != VECTOR_TYPE);
2637 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2638 for a call to a function whose data type is FNTYPE.
2639 For a library call, FNTYPE is 0. */
2641 void
2642 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2643 tree fntype, /* tree ptr for function decl */
2644 rtx libname, /* SYMBOL_REF of library name or 0 */
2645 tree fndecl)
2647 static CUMULATIVE_ARGS zero_cum;
2648 tree param, next_param;
2650 if (TARGET_DEBUG_ARG)
2652 fprintf (stderr, "\ninit_cumulative_args (");
2653 if (fntype)
2654 fprintf (stderr, "fntype code = %s, ret code = %s",
2655 tree_code_name[(int) TREE_CODE (fntype)],
2656 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2657 else
2658 fprintf (stderr, "no fntype");
2660 if (libname)
2661 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2664 *cum = zero_cum;
2666 /* Set up the number of registers to use for passing arguments. */
2667 cum->nregs = ix86_regparm;
2668 if (TARGET_SSE)
2669 cum->sse_nregs = SSE_REGPARM_MAX;
2670 if (TARGET_MMX)
2671 cum->mmx_nregs = MMX_REGPARM_MAX;
2672 cum->warn_sse = true;
2673 cum->warn_mmx = true;
2674 cum->maybe_vaarg = false;
2676 /* Use ecx and edx registers if function has fastcall attribute,
2677 else look for regparm information. */
2678 if (fntype && !TARGET_64BIT)
2680 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2682 cum->nregs = 2;
2683 cum->fastcall = 1;
2685 else
2686 cum->nregs = ix86_function_regparm (fntype, fndecl);
2689 /* Set up the number of SSE registers used for passing SFmode
2690 and DFmode arguments. Warn for mismatching ABI. */
2691 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2693 /* Determine if this function has variable arguments. This is
2694 indicated by the last argument being 'void_type_mode' if there
2695 are no variable arguments. If there are variable arguments, then
2696 we won't pass anything in registers in 32-bit mode. */
2698 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2700 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2701 param != 0; param = next_param)
2703 next_param = TREE_CHAIN (param);
2704 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2706 if (!TARGET_64BIT)
2708 cum->nregs = 0;
2709 cum->sse_nregs = 0;
2710 cum->mmx_nregs = 0;
2711 cum->warn_sse = 0;
2712 cum->warn_mmx = 0;
2713 cum->fastcall = 0;
2714 cum->float_in_sse = 0;
2716 cum->maybe_vaarg = true;
2720 if ((!fntype && !libname)
2721 || (fntype && !TYPE_ARG_TYPES (fntype)))
2722 cum->maybe_vaarg = true;
2724 if (TARGET_DEBUG_ARG)
2725 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2727 return;
2730 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2731 But in the case of vector types, it is some vector mode.
2733 When we have only some of our vector isa extensions enabled, then there
2734 are some modes for which vector_mode_supported_p is false. For these
2735 modes, the generic vector support in gcc will choose some non-vector mode
2736 in order to implement the type. By computing the natural mode, we'll
2737 select the proper ABI location for the operand and not depend on whatever
2738 the middle-end decides to do with these vector types. */
2740 static enum machine_mode
2741 type_natural_mode (tree type)
2743 enum machine_mode mode = TYPE_MODE (type);
2745 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2747 HOST_WIDE_INT size = int_size_in_bytes (type);
2748 if ((size == 8 || size == 16)
2749 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2750 && TYPE_VECTOR_SUBPARTS (type) > 1)
2752 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2754 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2755 mode = MIN_MODE_VECTOR_FLOAT;
2756 else
2757 mode = MIN_MODE_VECTOR_INT;
2759 /* Get the mode which has this inner mode and number of units. */
2760 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2761 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2762 && GET_MODE_INNER (mode) == innermode)
2763 return mode;
2765 gcc_unreachable ();
2769 return mode;
2772 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2773 this may not agree with the mode that the type system has chosen for the
2774 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2775 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2777 static rtx
2778 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2779 unsigned int regno)
2781 rtx tmp;
2783 if (orig_mode != BLKmode)
2784 tmp = gen_rtx_REG (orig_mode, regno);
2785 else
2787 tmp = gen_rtx_REG (mode, regno);
2788 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2789 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2792 return tmp;
2795 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2796 of this code is to classify each 8bytes of incoming argument by the register
2797 class and assign registers accordingly. */
2799 /* Return the union class of CLASS1 and CLASS2.
2800 See the x86-64 PS ABI for details. */
2802 static enum x86_64_reg_class
2803 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2805 /* Rule #1: If both classes are equal, this is the resulting class. */
2806 if (class1 == class2)
2807 return class1;
2809 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2810 the other class. */
2811 if (class1 == X86_64_NO_CLASS)
2812 return class2;
2813 if (class2 == X86_64_NO_CLASS)
2814 return class1;
2816 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2817 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2818 return X86_64_MEMORY_CLASS;
2820 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2821 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2822 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2823 return X86_64_INTEGERSI_CLASS;
2824 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2825 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2826 return X86_64_INTEGER_CLASS;
2828 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2829 MEMORY is used. */
2830 if (class1 == X86_64_X87_CLASS
2831 || class1 == X86_64_X87UP_CLASS
2832 || class1 == X86_64_COMPLEX_X87_CLASS
2833 || class2 == X86_64_X87_CLASS
2834 || class2 == X86_64_X87UP_CLASS
2835 || class2 == X86_64_COMPLEX_X87_CLASS)
2836 return X86_64_MEMORY_CLASS;
2838 /* Rule #6: Otherwise class SSE is used. */
2839 return X86_64_SSE_CLASS;
2842 /* Classify the argument of type TYPE and mode MODE.
2843 CLASSES will be filled by the register class used to pass each word
2844 of the operand. The number of words is returned. In case the parameter
2845 should be passed in memory, 0 is returned. As a special case for zero
2846 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2848 BIT_OFFSET is used internally for handling records and specifies offset
2849 of the offset in bits modulo 256 to avoid overflow cases.
2851 See the x86-64 PS ABI for details.
2854 static int
2855 classify_argument (enum machine_mode mode, tree type,
2856 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2858 HOST_WIDE_INT bytes =
2859 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2860 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2862 /* Variable sized entities are always passed/returned in memory. */
2863 if (bytes < 0)
2864 return 0;
2866 if (mode != VOIDmode
2867 && targetm.calls.must_pass_in_stack (mode, type))
2868 return 0;
2870 if (type && AGGREGATE_TYPE_P (type))
2872 int i;
2873 tree field;
2874 enum x86_64_reg_class subclasses[MAX_CLASSES];
2876 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2877 if (bytes > 16)
2878 return 0;
2880 for (i = 0; i < words; i++)
2881 classes[i] = X86_64_NO_CLASS;
2883 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2884 signalize memory class, so handle it as special case. */
2885 if (!words)
2887 classes[0] = X86_64_NO_CLASS;
2888 return 1;
2891 /* Classify each field of record and merge classes. */
2892 switch (TREE_CODE (type))
2894 case RECORD_TYPE:
2895 /* For classes first merge in the field of the subclasses. */
2896 if (TYPE_BINFO (type))
2898 tree binfo, base_binfo;
2899 int basenum;
2901 for (binfo = TYPE_BINFO (type), basenum = 0;
2902 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2904 int num;
2905 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2906 tree type = BINFO_TYPE (base_binfo);
2908 num = classify_argument (TYPE_MODE (type),
2909 type, subclasses,
2910 (offset + bit_offset) % 256);
2911 if (!num)
2912 return 0;
2913 for (i = 0; i < num; i++)
2915 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2916 classes[i + pos] =
2917 merge_classes (subclasses[i], classes[i + pos]);
2921 /* And now merge the fields of structure. */
2922 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2924 if (TREE_CODE (field) == FIELD_DECL)
2926 int num;
2928 /* Bitfields are always classified as integer. Handle them
2929 early, since later code would consider them to be
2930 misaligned integers. */
2931 if (DECL_BIT_FIELD (field))
2933 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2934 i < ((int_bit_position (field) + (bit_offset % 64))
2935 + tree_low_cst (DECL_SIZE (field), 0)
2936 + 63) / 8 / 8; i++)
2937 classes[i] =
2938 merge_classes (X86_64_INTEGER_CLASS,
2939 classes[i]);
2941 else
2943 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2944 TREE_TYPE (field), subclasses,
2945 (int_bit_position (field)
2946 + bit_offset) % 256);
2947 if (!num)
2948 return 0;
2949 for (i = 0; i < num; i++)
2951 int pos =
2952 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2953 classes[i + pos] =
2954 merge_classes (subclasses[i], classes[i + pos]);
2959 break;
2961 case ARRAY_TYPE:
2962 /* Arrays are handled as small records. */
2964 int num;
2965 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2966 TREE_TYPE (type), subclasses, bit_offset);
2967 if (!num)
2968 return 0;
2970 /* The partial classes are now full classes. */
2971 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2972 subclasses[0] = X86_64_SSE_CLASS;
2973 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2974 subclasses[0] = X86_64_INTEGER_CLASS;
2976 for (i = 0; i < words; i++)
2977 classes[i] = subclasses[i % num];
2979 break;
2981 case UNION_TYPE:
2982 case QUAL_UNION_TYPE:
2983 /* Unions are similar to RECORD_TYPE but offset is always 0.
2986 /* Unions are not derived. */
2987 gcc_assert (!TYPE_BINFO (type)
2988 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
2989 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2991 if (TREE_CODE (field) == FIELD_DECL)
2993 int num;
2994 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2995 TREE_TYPE (field), subclasses,
2996 bit_offset);
2997 if (!num)
2998 return 0;
2999 for (i = 0; i < num; i++)
3000 classes[i] = merge_classes (subclasses[i], classes[i]);
3003 break;
3005 default:
3006 gcc_unreachable ();
3009 /* Final merger cleanup. */
3010 for (i = 0; i < words; i++)
3012 /* If one class is MEMORY, everything should be passed in
3013 memory. */
3014 if (classes[i] == X86_64_MEMORY_CLASS)
3015 return 0;
3017 /* The X86_64_SSEUP_CLASS should be always preceded by
3018 X86_64_SSE_CLASS. */
3019 if (classes[i] == X86_64_SSEUP_CLASS
3020 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3021 classes[i] = X86_64_SSE_CLASS;
3023 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3024 if (classes[i] == X86_64_X87UP_CLASS
3025 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3026 classes[i] = X86_64_SSE_CLASS;
3028 return words;
3031 /* Compute alignment needed. We align all types to natural boundaries with
3032 exception of XFmode that is aligned to 64bits. */
3033 if (mode != VOIDmode && mode != BLKmode)
3035 int mode_alignment = GET_MODE_BITSIZE (mode);
3037 if (mode == XFmode)
3038 mode_alignment = 128;
3039 else if (mode == XCmode)
3040 mode_alignment = 256;
3041 if (COMPLEX_MODE_P (mode))
3042 mode_alignment /= 2;
3043 /* Misaligned fields are always returned in memory. */
3044 if (bit_offset % mode_alignment)
3045 return 0;
3048 /* for V1xx modes, just use the base mode */
3049 if (VECTOR_MODE_P (mode)
3050 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3051 mode = GET_MODE_INNER (mode);
3053 /* Classification of atomic types. */
3054 switch (mode)
3056 case DImode:
3057 case SImode:
3058 case HImode:
3059 case QImode:
3060 case CSImode:
3061 case CHImode:
3062 case CQImode:
3063 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3064 classes[0] = X86_64_INTEGERSI_CLASS;
3065 else
3066 classes[0] = X86_64_INTEGER_CLASS;
3067 return 1;
3068 case CDImode:
3069 case TImode:
3070 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3071 return 2;
3072 case CTImode:
3073 return 0;
3074 case SFmode:
3075 if (!(bit_offset % 64))
3076 classes[0] = X86_64_SSESF_CLASS;
3077 else
3078 classes[0] = X86_64_SSE_CLASS;
3079 return 1;
3080 case DFmode:
3081 classes[0] = X86_64_SSEDF_CLASS;
3082 return 1;
3083 case XFmode:
3084 classes[0] = X86_64_X87_CLASS;
3085 classes[1] = X86_64_X87UP_CLASS;
3086 return 2;
3087 case TFmode:
3088 classes[0] = X86_64_SSE_CLASS;
3089 classes[1] = X86_64_SSEUP_CLASS;
3090 return 2;
3091 case SCmode:
3092 classes[0] = X86_64_SSE_CLASS;
3093 return 1;
3094 case DCmode:
3095 classes[0] = X86_64_SSEDF_CLASS;
3096 classes[1] = X86_64_SSEDF_CLASS;
3097 return 2;
3098 case XCmode:
3099 classes[0] = X86_64_COMPLEX_X87_CLASS;
3100 return 1;
3101 case TCmode:
3102 /* This modes is larger than 16 bytes. */
3103 return 0;
3104 case V4SFmode:
3105 case V4SImode:
3106 case V16QImode:
3107 case V8HImode:
3108 case V2DFmode:
3109 case V2DImode:
3110 classes[0] = X86_64_SSE_CLASS;
3111 classes[1] = X86_64_SSEUP_CLASS;
3112 return 2;
3113 case V2SFmode:
3114 case V2SImode:
3115 case V4HImode:
3116 case V8QImode:
3117 classes[0] = X86_64_SSE_CLASS;
3118 return 1;
3119 case BLKmode:
3120 case VOIDmode:
3121 return 0;
3122 default:
3123 gcc_assert (VECTOR_MODE_P (mode));
3125 if (bytes > 16)
3126 return 0;
3128 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3130 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3131 classes[0] = X86_64_INTEGERSI_CLASS;
3132 else
3133 classes[0] = X86_64_INTEGER_CLASS;
3134 classes[1] = X86_64_INTEGER_CLASS;
3135 return 1 + (bytes > 8);
3139 /* Examine the argument and return set number of register required in each
3140 class. Return 0 iff parameter should be passed in memory. */
3141 static int
3142 examine_argument (enum machine_mode mode, tree type, int in_return,
3143 int *int_nregs, int *sse_nregs)
3145 enum x86_64_reg_class class[MAX_CLASSES];
3146 int n = classify_argument (mode, type, class, 0);
3148 *int_nregs = 0;
3149 *sse_nregs = 0;
3150 if (!n)
3151 return 0;
3152 for (n--; n >= 0; n--)
3153 switch (class[n])
3155 case X86_64_INTEGER_CLASS:
3156 case X86_64_INTEGERSI_CLASS:
3157 (*int_nregs)++;
3158 break;
3159 case X86_64_SSE_CLASS:
3160 case X86_64_SSESF_CLASS:
3161 case X86_64_SSEDF_CLASS:
3162 (*sse_nregs)++;
3163 break;
3164 case X86_64_NO_CLASS:
3165 case X86_64_SSEUP_CLASS:
3166 break;
3167 case X86_64_X87_CLASS:
3168 case X86_64_X87UP_CLASS:
3169 if (!in_return)
3170 return 0;
3171 break;
3172 case X86_64_COMPLEX_X87_CLASS:
3173 return in_return ? 2 : 0;
3174 case X86_64_MEMORY_CLASS:
3175 gcc_unreachable ();
3177 return 1;
3180 /* Construct container for the argument used by GCC interface. See
3181 FUNCTION_ARG for the detailed description. */
3183 static rtx
3184 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3185 tree type, int in_return, int nintregs, int nsseregs,
3186 const int *intreg, int sse_regno)
3188 enum machine_mode tmpmode;
3189 int bytes =
3190 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3191 enum x86_64_reg_class class[MAX_CLASSES];
3192 int n;
3193 int i;
3194 int nexps = 0;
3195 int needed_sseregs, needed_intregs;
3196 rtx exp[MAX_CLASSES];
3197 rtx ret;
3199 n = classify_argument (mode, type, class, 0);
3200 if (TARGET_DEBUG_ARG)
3202 if (!n)
3203 fprintf (stderr, "Memory class\n");
3204 else
3206 fprintf (stderr, "Classes:");
3207 for (i = 0; i < n; i++)
3209 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3211 fprintf (stderr, "\n");
3214 if (!n)
3215 return NULL;
3216 if (!examine_argument (mode, type, in_return, &needed_intregs,
3217 &needed_sseregs))
3218 return NULL;
3219 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3220 return NULL;
3222 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3223 some less clueful developer tries to use floating-point anyway. */
3224 if (needed_sseregs && !TARGET_SSE)
3226 static bool issued_error;
3227 if (!issued_error)
3229 issued_error = true;
3230 if (in_return)
3231 error ("SSE register return with SSE disabled");
3232 else
3233 error ("SSE register argument with SSE disabled");
3235 return NULL;
3238 /* First construct simple cases. Avoid SCmode, since we want to use
3239 single register to pass this type. */
3240 if (n == 1 && mode != SCmode)
3241 switch (class[0])
3243 case X86_64_INTEGER_CLASS:
3244 case X86_64_INTEGERSI_CLASS:
3245 return gen_rtx_REG (mode, intreg[0]);
3246 case X86_64_SSE_CLASS:
3247 case X86_64_SSESF_CLASS:
3248 case X86_64_SSEDF_CLASS:
3249 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3250 case X86_64_X87_CLASS:
3251 case X86_64_COMPLEX_X87_CLASS:
3252 return gen_rtx_REG (mode, FIRST_STACK_REG);
3253 case X86_64_NO_CLASS:
3254 /* Zero sized array, struct or class. */
3255 return NULL;
3256 default:
3257 gcc_unreachable ();
3259 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3260 && mode != BLKmode)
3261 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3262 if (n == 2
3263 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3264 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3265 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3266 && class[1] == X86_64_INTEGER_CLASS
3267 && (mode == CDImode || mode == TImode || mode == TFmode)
3268 && intreg[0] + 1 == intreg[1])
3269 return gen_rtx_REG (mode, intreg[0]);
3271 /* Otherwise figure out the entries of the PARALLEL. */
3272 for (i = 0; i < n; i++)
3274 switch (class[i])
3276 case X86_64_NO_CLASS:
3277 break;
3278 case X86_64_INTEGER_CLASS:
3279 case X86_64_INTEGERSI_CLASS:
3280 /* Merge TImodes on aligned occasions here too. */
3281 if (i * 8 + 8 > bytes)
3282 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3283 else if (class[i] == X86_64_INTEGERSI_CLASS)
3284 tmpmode = SImode;
3285 else
3286 tmpmode = DImode;
3287 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3288 if (tmpmode == BLKmode)
3289 tmpmode = DImode;
3290 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3291 gen_rtx_REG (tmpmode, *intreg),
3292 GEN_INT (i*8));
3293 intreg++;
3294 break;
3295 case X86_64_SSESF_CLASS:
3296 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3297 gen_rtx_REG (SFmode,
3298 SSE_REGNO (sse_regno)),
3299 GEN_INT (i*8));
3300 sse_regno++;
3301 break;
3302 case X86_64_SSEDF_CLASS:
3303 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3304 gen_rtx_REG (DFmode,
3305 SSE_REGNO (sse_regno)),
3306 GEN_INT (i*8));
3307 sse_regno++;
3308 break;
3309 case X86_64_SSE_CLASS:
3310 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3311 tmpmode = TImode;
3312 else
3313 tmpmode = DImode;
3314 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3315 gen_rtx_REG (tmpmode,
3316 SSE_REGNO (sse_regno)),
3317 GEN_INT (i*8));
3318 if (tmpmode == TImode)
3319 i++;
3320 sse_regno++;
3321 break;
3322 default:
3323 gcc_unreachable ();
3327 /* Empty aligned struct, union or class. */
3328 if (nexps == 0)
3329 return NULL;
3331 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3332 for (i = 0; i < nexps; i++)
3333 XVECEXP (ret, 0, i) = exp [i];
3334 return ret;
3337 /* Update the data in CUM to advance over an argument
3338 of mode MODE and data type TYPE.
3339 (TYPE is null for libcalls where that information may not be available.) */
3341 void
3342 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3343 tree type, int named)
3345 int bytes =
3346 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3347 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3349 if (type)
3350 mode = type_natural_mode (type);
3352 if (TARGET_DEBUG_ARG)
3353 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3354 "mode=%s, named=%d)\n\n",
3355 words, cum->words, cum->nregs, cum->sse_nregs,
3356 GET_MODE_NAME (mode), named);
3358 if (TARGET_64BIT)
3360 int int_nregs, sse_nregs;
3361 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3362 cum->words += words;
3363 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3365 cum->nregs -= int_nregs;
3366 cum->sse_nregs -= sse_nregs;
3367 cum->regno += int_nregs;
3368 cum->sse_regno += sse_nregs;
3370 else
3371 cum->words += words;
3373 else
3375 switch (mode)
3377 default:
3378 break;
3380 case BLKmode:
3381 if (bytes < 0)
3382 break;
3383 /* FALLTHRU */
3385 case DImode:
3386 case SImode:
3387 case HImode:
3388 case QImode:
3389 cum->words += words;
3390 cum->nregs -= words;
3391 cum->regno += words;
3393 if (cum->nregs <= 0)
3395 cum->nregs = 0;
3396 cum->regno = 0;
3398 break;
3400 case DFmode:
3401 if (cum->float_in_sse < 2)
3402 break;
3403 case SFmode:
3404 if (cum->float_in_sse < 1)
3405 break;
3406 /* FALLTHRU */
3408 case TImode:
3409 case V16QImode:
3410 case V8HImode:
3411 case V4SImode:
3412 case V2DImode:
3413 case V4SFmode:
3414 case V2DFmode:
3415 if (!type || !AGGREGATE_TYPE_P (type))
3417 cum->sse_words += words;
3418 cum->sse_nregs -= 1;
3419 cum->sse_regno += 1;
3420 if (cum->sse_nregs <= 0)
3422 cum->sse_nregs = 0;
3423 cum->sse_regno = 0;
3426 break;
3428 case V8QImode:
3429 case V4HImode:
3430 case V2SImode:
3431 case V2SFmode:
3432 if (!type || !AGGREGATE_TYPE_P (type))
3434 cum->mmx_words += words;
3435 cum->mmx_nregs -= 1;
3436 cum->mmx_regno += 1;
3437 if (cum->mmx_nregs <= 0)
3439 cum->mmx_nregs = 0;
3440 cum->mmx_regno = 0;
3443 break;
3448 /* Define where to put the arguments to a function.
3449 Value is zero to push the argument on the stack,
3450 or a hard register in which to store the argument.
3452 MODE is the argument's machine mode.
3453 TYPE is the data type of the argument (as a tree).
3454 This is null for libcalls where that information may
3455 not be available.
3456 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3457 the preceding args and about the function being called.
3458 NAMED is nonzero if this argument is a named parameter
3459 (otherwise it is an extra parameter matching an ellipsis). */
3462 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3463 tree type, int named)
3465 enum machine_mode mode = orig_mode;
3466 rtx ret = NULL_RTX;
3467 int bytes =
3468 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3469 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3470 static bool warnedsse, warnedmmx;
3472 /* To simplify the code below, represent vector types with a vector mode
3473 even if MMX/SSE are not active. */
3474 if (type && TREE_CODE (type) == VECTOR_TYPE)
3475 mode = type_natural_mode (type);
3477 /* Handle a hidden AL argument containing number of registers for varargs
3478 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3479 any AL settings. */
3480 if (mode == VOIDmode)
3482 if (TARGET_64BIT)
3483 return GEN_INT (cum->maybe_vaarg
3484 ? (cum->sse_nregs < 0
3485 ? SSE_REGPARM_MAX
3486 : cum->sse_regno)
3487 : -1);
3488 else
3489 return constm1_rtx;
3491 if (TARGET_64BIT)
3492 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3493 cum->sse_nregs,
3494 &x86_64_int_parameter_registers [cum->regno],
3495 cum->sse_regno);
3496 else
3497 switch (mode)
3499 /* For now, pass fp/complex values on the stack. */
3500 default:
3501 break;
3503 case BLKmode:
3504 if (bytes < 0)
3505 break;
3506 /* FALLTHRU */
3507 case DImode:
3508 case SImode:
3509 case HImode:
3510 case QImode:
3511 if (words <= cum->nregs)
3513 int regno = cum->regno;
3515 /* Fastcall allocates the first two DWORD (SImode) or
3516 smaller arguments to ECX and EDX. */
3517 if (cum->fastcall)
3519 if (mode == BLKmode || mode == DImode)
3520 break;
3522 /* ECX not EAX is the first allocated register. */
3523 if (regno == 0)
3524 regno = 2;
3526 ret = gen_rtx_REG (mode, regno);
3528 break;
3529 case DFmode:
3530 if (cum->float_in_sse < 2)
3531 break;
3532 case SFmode:
3533 if (cum->float_in_sse < 1)
3534 break;
3535 /* FALLTHRU */
3536 case TImode:
3537 case V16QImode:
3538 case V8HImode:
3539 case V4SImode:
3540 case V2DImode:
3541 case V4SFmode:
3542 case V2DFmode:
3543 if (!type || !AGGREGATE_TYPE_P (type))
3545 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3547 warnedsse = true;
3548 warning (0, "SSE vector argument without SSE enabled "
3549 "changes the ABI");
3551 if (cum->sse_nregs)
3552 ret = gen_reg_or_parallel (mode, orig_mode,
3553 cum->sse_regno + FIRST_SSE_REG);
3555 break;
3556 case V8QImode:
3557 case V4HImode:
3558 case V2SImode:
3559 case V2SFmode:
3560 if (!type || !AGGREGATE_TYPE_P (type))
3562 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3564 warnedmmx = true;
3565 warning (0, "MMX vector argument without MMX enabled "
3566 "changes the ABI");
3568 if (cum->mmx_nregs)
3569 ret = gen_reg_or_parallel (mode, orig_mode,
3570 cum->mmx_regno + FIRST_MMX_REG);
3572 break;
3575 if (TARGET_DEBUG_ARG)
3577 fprintf (stderr,
3578 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3579 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3581 if (ret)
3582 print_simple_rtl (stderr, ret);
3583 else
3584 fprintf (stderr, ", stack");
3586 fprintf (stderr, " )\n");
3589 return ret;
3592 /* A C expression that indicates when an argument must be passed by
3593 reference. If nonzero for an argument, a copy of that argument is
3594 made in memory and a pointer to the argument is passed instead of
3595 the argument itself. The pointer is passed in whatever way is
3596 appropriate for passing a pointer to that type. */
3598 static bool
3599 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3600 enum machine_mode mode ATTRIBUTE_UNUSED,
3601 tree type, bool named ATTRIBUTE_UNUSED)
3603 if (!TARGET_64BIT)
3604 return 0;
3606 if (type && int_size_in_bytes (type) == -1)
3608 if (TARGET_DEBUG_ARG)
3609 fprintf (stderr, "function_arg_pass_by_reference\n");
3610 return 1;
3613 return 0;
3616 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3617 ABI. Only called if TARGET_SSE. */
3618 static bool
3619 contains_128bit_aligned_vector_p (tree type)
3621 enum machine_mode mode = TYPE_MODE (type);
3622 if (SSE_REG_MODE_P (mode)
3623 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3624 return true;
3625 if (TYPE_ALIGN (type) < 128)
3626 return false;
3628 if (AGGREGATE_TYPE_P (type))
3630 /* Walk the aggregates recursively. */
3631 switch (TREE_CODE (type))
3633 case RECORD_TYPE:
3634 case UNION_TYPE:
3635 case QUAL_UNION_TYPE:
3637 tree field;
3639 if (TYPE_BINFO (type))
3641 tree binfo, base_binfo;
3642 int i;
3644 for (binfo = TYPE_BINFO (type), i = 0;
3645 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3646 if (contains_128bit_aligned_vector_p
3647 (BINFO_TYPE (base_binfo)))
3648 return true;
3650 /* And now merge the fields of structure. */
3651 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3653 if (TREE_CODE (field) == FIELD_DECL
3654 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3655 return true;
3657 break;
3660 case ARRAY_TYPE:
3661 /* Just for use if some languages passes arrays by value. */
3662 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3663 return true;
3664 break;
3666 default:
3667 gcc_unreachable ();
3670 return false;
3673 /* Gives the alignment boundary, in bits, of an argument with the
3674 specified mode and type. */
3677 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3679 int align;
3680 if (type)
3681 align = TYPE_ALIGN (type);
3682 else
3683 align = GET_MODE_ALIGNMENT (mode);
3684 if (align < PARM_BOUNDARY)
3685 align = PARM_BOUNDARY;
3686 if (!TARGET_64BIT)
3688 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3689 make an exception for SSE modes since these require 128bit
3690 alignment.
3692 The handling here differs from field_alignment. ICC aligns MMX
3693 arguments to 4 byte boundaries, while structure fields are aligned
3694 to 8 byte boundaries. */
3695 if (!TARGET_SSE)
3696 align = PARM_BOUNDARY;
3697 else if (!type)
3699 if (!SSE_REG_MODE_P (mode))
3700 align = PARM_BOUNDARY;
3702 else
3704 if (!contains_128bit_aligned_vector_p (type))
3705 align = PARM_BOUNDARY;
3708 if (align > 128)
3709 align = 128;
3710 return align;
3713 /* Return true if N is a possible register number of function value. */
3714 bool
3715 ix86_function_value_regno_p (int regno)
3717 if (regno == 0
3718 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3719 || (regno == FIRST_SSE_REG && TARGET_SSE))
3720 return true;
3722 if (!TARGET_64BIT
3723 && (regno == FIRST_MMX_REG && TARGET_MMX))
3724 return true;
3726 return false;
3729 /* Define how to find the value returned by a function.
3730 VALTYPE is the data type of the value (as a tree).
3731 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3732 otherwise, FUNC is 0. */
3734 ix86_function_value (tree valtype, tree fntype_or_decl,
3735 bool outgoing ATTRIBUTE_UNUSED)
3737 enum machine_mode natmode = type_natural_mode (valtype);
3739 if (TARGET_64BIT)
3741 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3742 1, REGPARM_MAX, SSE_REGPARM_MAX,
3743 x86_64_int_return_registers, 0);
3744 /* For zero sized structures, construct_container return NULL, but we
3745 need to keep rest of compiler happy by returning meaningful value. */
3746 if (!ret)
3747 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3748 return ret;
3750 else
3752 tree fn = NULL_TREE, fntype;
3753 if (fntype_or_decl
3754 && DECL_P (fntype_or_decl))
3755 fn = fntype_or_decl;
3756 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3757 return gen_rtx_REG (TYPE_MODE (valtype),
3758 ix86_value_regno (natmode, fn, fntype));
3762 /* Return true iff type is returned in memory. */
3764 ix86_return_in_memory (tree type)
3766 int needed_intregs, needed_sseregs, size;
3767 enum machine_mode mode = type_natural_mode (type);
3769 if (TARGET_64BIT)
3770 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3772 if (mode == BLKmode)
3773 return 1;
3775 size = int_size_in_bytes (type);
3777 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3778 return 0;
3780 if (VECTOR_MODE_P (mode) || mode == TImode)
3782 /* User-created vectors small enough to fit in EAX. */
3783 if (size < 8)
3784 return 0;
3786 /* MMX/3dNow values are returned in MM0,
3787 except when it doesn't exits. */
3788 if (size == 8)
3789 return (TARGET_MMX ? 0 : 1);
3791 /* SSE values are returned in XMM0, except when it doesn't exist. */
3792 if (size == 16)
3793 return (TARGET_SSE ? 0 : 1);
3796 if (mode == XFmode)
3797 return 0;
3799 if (size > 12)
3800 return 1;
3801 return 0;
3804 /* When returning SSE vector types, we have a choice of either
3805 (1) being abi incompatible with a -march switch, or
3806 (2) generating an error.
3807 Given no good solution, I think the safest thing is one warning.
3808 The user won't be able to use -Werror, but....
3810 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3811 called in response to actually generating a caller or callee that
3812 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3813 via aggregate_value_p for general type probing from tree-ssa. */
3815 static rtx
3816 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3818 static bool warnedsse, warnedmmx;
3820 if (type)
3822 /* Look at the return type of the function, not the function type. */
3823 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3825 if (!TARGET_SSE && !warnedsse)
3827 if (mode == TImode
3828 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3830 warnedsse = true;
3831 warning (0, "SSE vector return without SSE enabled "
3832 "changes the ABI");
3836 if (!TARGET_MMX && !warnedmmx)
3838 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3840 warnedmmx = true;
3841 warning (0, "MMX vector return without MMX enabled "
3842 "changes the ABI");
3847 return NULL;
3850 /* Define how to find the value returned by a library function
3851 assuming the value has mode MODE. */
3853 ix86_libcall_value (enum machine_mode mode)
3855 if (TARGET_64BIT)
3857 switch (mode)
3859 case SFmode:
3860 case SCmode:
3861 case DFmode:
3862 case DCmode:
3863 case TFmode:
3864 return gen_rtx_REG (mode, FIRST_SSE_REG);
3865 case XFmode:
3866 case XCmode:
3867 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3868 case TCmode:
3869 return NULL;
3870 default:
3871 return gen_rtx_REG (mode, 0);
3874 else
3875 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3878 /* Given a mode, return the register to use for a return value. */
3880 static int
3881 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3883 gcc_assert (!TARGET_64BIT);
3885 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3886 we prevent this case when mmx is not available. */
3887 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3888 return FIRST_MMX_REG;
3890 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3891 we prevent this case when sse is not available. */
3892 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3893 return FIRST_SSE_REG;
3895 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3896 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
3897 return 0;
3899 /* Floating point return values in %st(0), except for local functions when
3900 SSE math is enabled or for functions with sseregparm attribute. */
3901 if ((func || fntype)
3902 && (mode == SFmode || mode == DFmode))
3904 int sse_level = ix86_function_sseregparm (fntype, func);
3905 if ((sse_level >= 1 && mode == SFmode)
3906 || (sse_level == 2 && mode == DFmode))
3907 return FIRST_SSE_REG;
3910 return FIRST_FLOAT_REG;
3913 /* Create the va_list data type. */
3915 static tree
3916 ix86_build_builtin_va_list (void)
3918 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3920 /* For i386 we use plain pointer to argument area. */
3921 if (!TARGET_64BIT)
3922 return build_pointer_type (char_type_node);
3924 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3925 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3927 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3928 unsigned_type_node);
3929 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3930 unsigned_type_node);
3931 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3932 ptr_type_node);
3933 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3934 ptr_type_node);
3936 va_list_gpr_counter_field = f_gpr;
3937 va_list_fpr_counter_field = f_fpr;
3939 DECL_FIELD_CONTEXT (f_gpr) = record;
3940 DECL_FIELD_CONTEXT (f_fpr) = record;
3941 DECL_FIELD_CONTEXT (f_ovf) = record;
3942 DECL_FIELD_CONTEXT (f_sav) = record;
3944 TREE_CHAIN (record) = type_decl;
3945 TYPE_NAME (record) = type_decl;
3946 TYPE_FIELDS (record) = f_gpr;
3947 TREE_CHAIN (f_gpr) = f_fpr;
3948 TREE_CHAIN (f_fpr) = f_ovf;
3949 TREE_CHAIN (f_ovf) = f_sav;
3951 layout_type (record);
3953 /* The correct type is an array type of one element. */
3954 return build_array_type (record, build_index_type (size_zero_node));
3957 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3959 static void
3960 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3961 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3962 int no_rtl)
3964 CUMULATIVE_ARGS next_cum;
3965 rtx save_area = NULL_RTX, mem;
3966 rtx label;
3967 rtx label_ref;
3968 rtx tmp_reg;
3969 rtx nsse_reg;
3970 int set;
3971 tree fntype;
3972 int stdarg_p;
3973 int i;
3975 if (!TARGET_64BIT)
3976 return;
3978 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3979 return;
3981 /* Indicate to allocate space on the stack for varargs save area. */
3982 ix86_save_varrargs_registers = 1;
3984 cfun->stack_alignment_needed = 128;
3986 fntype = TREE_TYPE (current_function_decl);
3987 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3988 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3989 != void_type_node));
3991 /* For varargs, we do not want to skip the dummy va_dcl argument.
3992 For stdargs, we do want to skip the last named argument. */
3993 next_cum = *cum;
3994 if (stdarg_p)
3995 function_arg_advance (&next_cum, mode, type, 1);
3997 if (!no_rtl)
3998 save_area = frame_pointer_rtx;
4000 set = get_varargs_alias_set ();
4002 for (i = next_cum.regno;
4003 i < ix86_regparm
4004 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4005 i++)
4007 mem = gen_rtx_MEM (Pmode,
4008 plus_constant (save_area, i * UNITS_PER_WORD));
4009 MEM_NOTRAP_P (mem) = 1;
4010 set_mem_alias_set (mem, set);
4011 emit_move_insn (mem, gen_rtx_REG (Pmode,
4012 x86_64_int_parameter_registers[i]));
4015 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4017 /* Now emit code to save SSE registers. The AX parameter contains number
4018 of SSE parameter registers used to call this function. We use
4019 sse_prologue_save insn template that produces computed jump across
4020 SSE saves. We need some preparation work to get this working. */
4022 label = gen_label_rtx ();
4023 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4025 /* Compute address to jump to :
4026 label - 5*eax + nnamed_sse_arguments*5 */
4027 tmp_reg = gen_reg_rtx (Pmode);
4028 nsse_reg = gen_reg_rtx (Pmode);
4029 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4030 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4031 gen_rtx_MULT (Pmode, nsse_reg,
4032 GEN_INT (4))));
4033 if (next_cum.sse_regno)
4034 emit_move_insn
4035 (nsse_reg,
4036 gen_rtx_CONST (DImode,
4037 gen_rtx_PLUS (DImode,
4038 label_ref,
4039 GEN_INT (next_cum.sse_regno * 4))));
4040 else
4041 emit_move_insn (nsse_reg, label_ref);
4042 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4044 /* Compute address of memory block we save into. We always use pointer
4045 pointing 127 bytes after first byte to store - this is needed to keep
4046 instruction size limited by 4 bytes. */
4047 tmp_reg = gen_reg_rtx (Pmode);
4048 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4049 plus_constant (save_area,
4050 8 * REGPARM_MAX + 127)));
4051 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4052 MEM_NOTRAP_P (mem) = 1;
4053 set_mem_alias_set (mem, set);
4054 set_mem_align (mem, BITS_PER_WORD);
4056 /* And finally do the dirty job! */
4057 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4058 GEN_INT (next_cum.sse_regno), label));
4063 /* Implement va_start. */
4065 void
4066 ix86_va_start (tree valist, rtx nextarg)
4068 HOST_WIDE_INT words, n_gpr, n_fpr;
4069 tree f_gpr, f_fpr, f_ovf, f_sav;
4070 tree gpr, fpr, ovf, sav, t;
4072 /* Only 64bit target needs something special. */
4073 if (!TARGET_64BIT)
4075 std_expand_builtin_va_start (valist, nextarg);
4076 return;
4079 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4080 f_fpr = TREE_CHAIN (f_gpr);
4081 f_ovf = TREE_CHAIN (f_fpr);
4082 f_sav = TREE_CHAIN (f_ovf);
4084 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4085 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4086 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4087 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4088 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4090 /* Count number of gp and fp argument registers used. */
4091 words = current_function_args_info.words;
4092 n_gpr = current_function_args_info.regno;
4093 n_fpr = current_function_args_info.sse_regno;
4095 if (TARGET_DEBUG_ARG)
4096 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4097 (int) words, (int) n_gpr, (int) n_fpr);
4099 if (cfun->va_list_gpr_size)
4101 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
4102 build_int_cst (NULL_TREE, n_gpr * 8));
4103 TREE_SIDE_EFFECTS (t) = 1;
4104 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4107 if (cfun->va_list_fpr_size)
4109 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
4110 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
4111 TREE_SIDE_EFFECTS (t) = 1;
4112 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4115 /* Find the overflow area. */
4116 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
4117 if (words != 0)
4118 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), t,
4119 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
4120 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4121 TREE_SIDE_EFFECTS (t) = 1;
4122 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4124 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4126 /* Find the register save area.
4127 Prologue of the function save it right above stack frame. */
4128 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
4129 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
4130 TREE_SIDE_EFFECTS (t) = 1;
4131 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4135 /* Implement va_arg. */
4137 tree
4138 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4140 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4141 tree f_gpr, f_fpr, f_ovf, f_sav;
4142 tree gpr, fpr, ovf, sav, t;
4143 int size, rsize;
4144 tree lab_false, lab_over = NULL_TREE;
4145 tree addr, t2;
4146 rtx container;
4147 int indirect_p = 0;
4148 tree ptrtype;
4149 enum machine_mode nat_mode;
4151 /* Only 64bit target needs something special. */
4152 if (!TARGET_64BIT)
4153 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4155 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4156 f_fpr = TREE_CHAIN (f_gpr);
4157 f_ovf = TREE_CHAIN (f_fpr);
4158 f_sav = TREE_CHAIN (f_ovf);
4160 valist = build_va_arg_indirect_ref (valist);
4161 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4162 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4163 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4164 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4166 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4167 if (indirect_p)
4168 type = build_pointer_type (type);
4169 size = int_size_in_bytes (type);
4170 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4172 nat_mode = type_natural_mode (type);
4173 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4174 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4176 /* Pull the value out of the saved registers. */
4178 addr = create_tmp_var (ptr_type_node, "addr");
4179 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4181 if (container)
4183 int needed_intregs, needed_sseregs;
4184 bool need_temp;
4185 tree int_addr, sse_addr;
4187 lab_false = create_artificial_label ();
4188 lab_over = create_artificial_label ();
4190 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4192 need_temp = (!REG_P (container)
4193 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4194 || TYPE_ALIGN (type) > 128));
4196 /* In case we are passing structure, verify that it is consecutive block
4197 on the register save area. If not we need to do moves. */
4198 if (!need_temp && !REG_P (container))
4200 /* Verify that all registers are strictly consecutive */
4201 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4203 int i;
4205 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4207 rtx slot = XVECEXP (container, 0, i);
4208 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4209 || INTVAL (XEXP (slot, 1)) != i * 16)
4210 need_temp = 1;
4213 else
4215 int i;
4217 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4219 rtx slot = XVECEXP (container, 0, i);
4220 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4221 || INTVAL (XEXP (slot, 1)) != i * 8)
4222 need_temp = 1;
4226 if (!need_temp)
4228 int_addr = addr;
4229 sse_addr = addr;
4231 else
4233 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4234 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4235 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4236 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4239 /* First ensure that we fit completely in registers. */
4240 if (needed_intregs)
4242 t = build_int_cst (TREE_TYPE (gpr),
4243 (REGPARM_MAX - needed_intregs + 1) * 8);
4244 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4245 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4246 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4247 gimplify_and_add (t, pre_p);
4249 if (needed_sseregs)
4251 t = build_int_cst (TREE_TYPE (fpr),
4252 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4253 + REGPARM_MAX * 8);
4254 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4255 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4256 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4257 gimplify_and_add (t, pre_p);
4260 /* Compute index to start of area used for integer regs. */
4261 if (needed_intregs)
4263 /* int_addr = gpr + sav; */
4264 t = fold_convert (ptr_type_node, gpr);
4265 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4266 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4267 gimplify_and_add (t, pre_p);
4269 if (needed_sseregs)
4271 /* sse_addr = fpr + sav; */
4272 t = fold_convert (ptr_type_node, fpr);
4273 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4274 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4275 gimplify_and_add (t, pre_p);
4277 if (need_temp)
4279 int i;
4280 tree temp = create_tmp_var (type, "va_arg_tmp");
4282 /* addr = &temp; */
4283 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4284 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4285 gimplify_and_add (t, pre_p);
4287 for (i = 0; i < XVECLEN (container, 0); i++)
4289 rtx slot = XVECEXP (container, 0, i);
4290 rtx reg = XEXP (slot, 0);
4291 enum machine_mode mode = GET_MODE (reg);
4292 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4293 tree addr_type = build_pointer_type (piece_type);
4294 tree src_addr, src;
4295 int src_offset;
4296 tree dest_addr, dest;
4298 if (SSE_REGNO_P (REGNO (reg)))
4300 src_addr = sse_addr;
4301 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4303 else
4305 src_addr = int_addr;
4306 src_offset = REGNO (reg) * 8;
4308 src_addr = fold_convert (addr_type, src_addr);
4309 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4310 size_int (src_offset)));
4311 src = build_va_arg_indirect_ref (src_addr);
4313 dest_addr = fold_convert (addr_type, addr);
4314 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4315 size_int (INTVAL (XEXP (slot, 1)))));
4316 dest = build_va_arg_indirect_ref (dest_addr);
4318 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4319 gimplify_and_add (t, pre_p);
4323 if (needed_intregs)
4325 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4326 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4327 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4328 gimplify_and_add (t, pre_p);
4330 if (needed_sseregs)
4332 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4333 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4334 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4335 gimplify_and_add (t, pre_p);
4338 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4339 gimplify_and_add (t, pre_p);
4341 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4342 append_to_statement_list (t, pre_p);
4345 /* ... otherwise out of the overflow area. */
4347 /* Care for on-stack alignment if needed. */
4348 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4349 || integer_zerop (TYPE_SIZE (type)))
4350 t = ovf;
4351 else
4353 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4354 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4355 build_int_cst (TREE_TYPE (ovf), align - 1));
4356 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4357 build_int_cst (TREE_TYPE (t), -align));
4359 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4361 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4362 gimplify_and_add (t2, pre_p);
4364 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4365 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4366 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4367 gimplify_and_add (t, pre_p);
4369 if (container)
4371 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4372 append_to_statement_list (t, pre_p);
4375 ptrtype = build_pointer_type (type);
4376 addr = fold_convert (ptrtype, addr);
4378 if (indirect_p)
4379 addr = build_va_arg_indirect_ref (addr);
4380 return build_va_arg_indirect_ref (addr);
4383 /* Return nonzero if OPNUM's MEM should be matched
4384 in movabs* patterns. */
4387 ix86_check_movabs (rtx insn, int opnum)
4389 rtx set, mem;
4391 set = PATTERN (insn);
4392 if (GET_CODE (set) == PARALLEL)
4393 set = XVECEXP (set, 0, 0);
4394 gcc_assert (GET_CODE (set) == SET);
4395 mem = XEXP (set, opnum);
4396 while (GET_CODE (mem) == SUBREG)
4397 mem = SUBREG_REG (mem);
4398 gcc_assert (GET_CODE (mem) == MEM);
4399 return (volatile_ok || !MEM_VOLATILE_P (mem));
4402 /* Initialize the table of extra 80387 mathematical constants. */
4404 static void
4405 init_ext_80387_constants (void)
4407 static const char * cst[5] =
4409 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4410 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4411 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4412 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4413 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4415 int i;
4417 for (i = 0; i < 5; i++)
4419 real_from_string (&ext_80387_constants_table[i], cst[i]);
4420 /* Ensure each constant is rounded to XFmode precision. */
4421 real_convert (&ext_80387_constants_table[i],
4422 XFmode, &ext_80387_constants_table[i]);
4425 ext_80387_constants_init = 1;
4428 /* Return true if the constant is something that can be loaded with
4429 a special instruction. */
4432 standard_80387_constant_p (rtx x)
4434 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4435 return -1;
4437 if (x == CONST0_RTX (GET_MODE (x)))
4438 return 1;
4439 if (x == CONST1_RTX (GET_MODE (x)))
4440 return 2;
4442 /* For XFmode constants, try to find a special 80387 instruction when
4443 optimizing for size or on those CPUs that benefit from them. */
4444 if (GET_MODE (x) == XFmode
4445 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4447 REAL_VALUE_TYPE r;
4448 int i;
4450 if (! ext_80387_constants_init)
4451 init_ext_80387_constants ();
4453 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4454 for (i = 0; i < 5; i++)
4455 if (real_identical (&r, &ext_80387_constants_table[i]))
4456 return i + 3;
4459 return 0;
4462 /* Return the opcode of the special instruction to be used to load
4463 the constant X. */
4465 const char *
4466 standard_80387_constant_opcode (rtx x)
4468 switch (standard_80387_constant_p (x))
4470 case 1:
4471 return "fldz";
4472 case 2:
4473 return "fld1";
4474 case 3:
4475 return "fldlg2";
4476 case 4:
4477 return "fldln2";
4478 case 5:
4479 return "fldl2e";
4480 case 6:
4481 return "fldl2t";
4482 case 7:
4483 return "fldpi";
4484 default:
4485 gcc_unreachable ();
4489 /* Return the CONST_DOUBLE representing the 80387 constant that is
4490 loaded by the specified special instruction. The argument IDX
4491 matches the return value from standard_80387_constant_p. */
4494 standard_80387_constant_rtx (int idx)
4496 int i;
4498 if (! ext_80387_constants_init)
4499 init_ext_80387_constants ();
4501 switch (idx)
4503 case 3:
4504 case 4:
4505 case 5:
4506 case 6:
4507 case 7:
4508 i = idx - 3;
4509 break;
4511 default:
4512 gcc_unreachable ();
4515 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4516 XFmode);
4519 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4522 standard_sse_constant_p (rtx x)
4524 if (x == const0_rtx)
4525 return 1;
4526 return (x == CONST0_RTX (GET_MODE (x)));
4529 /* Returns 1 if OP contains a symbol reference */
4532 symbolic_reference_mentioned_p (rtx op)
4534 const char *fmt;
4535 int i;
4537 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4538 return 1;
4540 fmt = GET_RTX_FORMAT (GET_CODE (op));
4541 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4543 if (fmt[i] == 'E')
4545 int j;
4547 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4548 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4549 return 1;
4552 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4553 return 1;
4556 return 0;
4559 /* Return 1 if it is appropriate to emit `ret' instructions in the
4560 body of a function. Do this only if the epilogue is simple, needing a
4561 couple of insns. Prior to reloading, we can't tell how many registers
4562 must be saved, so return 0 then. Return 0 if there is no frame
4563 marker to de-allocate. */
4566 ix86_can_use_return_insn_p (void)
4568 struct ix86_frame frame;
4570 if (! reload_completed || frame_pointer_needed)
4571 return 0;
4573 /* Don't allow more than 32 pop, since that's all we can do
4574 with one instruction. */
4575 if (current_function_pops_args
4576 && current_function_args_size >= 32768)
4577 return 0;
4579 ix86_compute_frame_layout (&frame);
4580 return frame.to_allocate == 0 && frame.nregs == 0;
4583 /* Value should be nonzero if functions must have frame pointers.
4584 Zero means the frame pointer need not be set up (and parms may
4585 be accessed via the stack pointer) in functions that seem suitable. */
4588 ix86_frame_pointer_required (void)
4590 /* If we accessed previous frames, then the generated code expects
4591 to be able to access the saved ebp value in our frame. */
4592 if (cfun->machine->accesses_prev_frame)
4593 return 1;
4595 /* Several x86 os'es need a frame pointer for other reasons,
4596 usually pertaining to setjmp. */
4597 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4598 return 1;
4600 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4601 the frame pointer by default. Turn it back on now if we've not
4602 got a leaf function. */
4603 if (TARGET_OMIT_LEAF_FRAME_POINTER
4604 && (!current_function_is_leaf
4605 || ix86_current_function_calls_tls_descriptor))
4606 return 1;
4608 if (current_function_profile)
4609 return 1;
4611 return 0;
4614 /* Record that the current function accesses previous call frames. */
4616 void
4617 ix86_setup_frame_addresses (void)
4619 cfun->machine->accesses_prev_frame = 1;
4622 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4623 # define USE_HIDDEN_LINKONCE 1
4624 #else
4625 # define USE_HIDDEN_LINKONCE 0
4626 #endif
4628 static int pic_labels_used;
4630 /* Fills in the label name that should be used for a pc thunk for
4631 the given register. */
4633 static void
4634 get_pc_thunk_name (char name[32], unsigned int regno)
4636 if (USE_HIDDEN_LINKONCE)
4637 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4638 else
4639 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4643 /* This function generates code for -fpic that loads %ebx with
4644 the return address of the caller and then returns. */
4646 void
4647 ix86_file_end (void)
4649 rtx xops[2];
4650 int regno;
4652 for (regno = 0; regno < 8; ++regno)
4654 char name[32];
4656 if (! ((pic_labels_used >> regno) & 1))
4657 continue;
4659 get_pc_thunk_name (name, regno);
4661 #if TARGET_MACHO
4662 if (TARGET_MACHO)
4664 switch_to_section (darwin_sections[text_coal_section]);
4665 fputs ("\t.weak_definition\t", asm_out_file);
4666 assemble_name (asm_out_file, name);
4667 fputs ("\n\t.private_extern\t", asm_out_file);
4668 assemble_name (asm_out_file, name);
4669 fputs ("\n", asm_out_file);
4670 ASM_OUTPUT_LABEL (asm_out_file, name);
4672 else
4673 #endif
4674 if (USE_HIDDEN_LINKONCE)
4676 tree decl;
4678 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4679 error_mark_node);
4680 TREE_PUBLIC (decl) = 1;
4681 TREE_STATIC (decl) = 1;
4682 DECL_ONE_ONLY (decl) = 1;
4684 (*targetm.asm_out.unique_section) (decl, 0);
4685 switch_to_section (get_named_section (decl, NULL, 0));
4687 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4688 fputs ("\t.hidden\t", asm_out_file);
4689 assemble_name (asm_out_file, name);
4690 fputc ('\n', asm_out_file);
4691 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4693 else
4695 switch_to_section (text_section);
4696 ASM_OUTPUT_LABEL (asm_out_file, name);
4699 xops[0] = gen_rtx_REG (SImode, regno);
4700 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4701 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4702 output_asm_insn ("ret", xops);
4705 if (NEED_INDICATE_EXEC_STACK)
4706 file_end_indicate_exec_stack ();
4709 /* Emit code for the SET_GOT patterns. */
4711 const char *
4712 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4714 rtx xops[3];
4716 xops[0] = dest;
4717 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4719 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4721 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4723 if (!flag_pic)
4724 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4725 else
4726 output_asm_insn ("call\t%a2", xops);
4728 #if TARGET_MACHO
4729 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4730 is what will be referenced by the Mach-O PIC subsystem. */
4731 if (!label)
4732 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4733 #endif
4735 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4736 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4738 if (flag_pic)
4739 output_asm_insn ("pop{l}\t%0", xops);
4741 else
4743 char name[32];
4744 get_pc_thunk_name (name, REGNO (dest));
4745 pic_labels_used |= 1 << REGNO (dest);
4747 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4748 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4749 output_asm_insn ("call\t%X2", xops);
4750 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4751 is what will be referenced by the Mach-O PIC subsystem. */
4752 #if TARGET_MACHO
4753 if (!label)
4754 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4755 else
4756 targetm.asm_out.internal_label (asm_out_file, "L",
4757 CODE_LABEL_NUMBER (label));
4758 #endif
4761 if (TARGET_MACHO)
4762 return "";
4764 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4765 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4766 else
4767 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4769 return "";
4772 /* Generate an "push" pattern for input ARG. */
4774 static rtx
4775 gen_push (rtx arg)
4777 return gen_rtx_SET (VOIDmode,
4778 gen_rtx_MEM (Pmode,
4779 gen_rtx_PRE_DEC (Pmode,
4780 stack_pointer_rtx)),
4781 arg);
4784 /* Return >= 0 if there is an unused call-clobbered register available
4785 for the entire function. */
4787 static unsigned int
4788 ix86_select_alt_pic_regnum (void)
4790 if (current_function_is_leaf && !current_function_profile
4791 && !ix86_current_function_calls_tls_descriptor)
4793 int i;
4794 for (i = 2; i >= 0; --i)
4795 if (!regs_ever_live[i])
4796 return i;
4799 return INVALID_REGNUM;
4802 /* Return 1 if we need to save REGNO. */
4803 static int
4804 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4806 if (pic_offset_table_rtx
4807 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4808 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4809 || current_function_profile
4810 || current_function_calls_eh_return
4811 || current_function_uses_const_pool))
4813 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4814 return 0;
4815 return 1;
4818 if (current_function_calls_eh_return && maybe_eh_return)
4820 unsigned i;
4821 for (i = 0; ; i++)
4823 unsigned test = EH_RETURN_DATA_REGNO (i);
4824 if (test == INVALID_REGNUM)
4825 break;
4826 if (test == regno)
4827 return 1;
4831 if (cfun->machine->force_align_arg_pointer
4832 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4833 return 1;
4835 return (regs_ever_live[regno]
4836 && !call_used_regs[regno]
4837 && !fixed_regs[regno]
4838 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4841 /* Return number of registers to be saved on the stack. */
4843 static int
4844 ix86_nsaved_regs (void)
4846 int nregs = 0;
4847 int regno;
4849 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4850 if (ix86_save_reg (regno, true))
4851 nregs++;
4852 return nregs;
4855 /* Return the offset between two registers, one to be eliminated, and the other
4856 its replacement, at the start of a routine. */
4858 HOST_WIDE_INT
4859 ix86_initial_elimination_offset (int from, int to)
4861 struct ix86_frame frame;
4862 ix86_compute_frame_layout (&frame);
4864 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4865 return frame.hard_frame_pointer_offset;
4866 else if (from == FRAME_POINTER_REGNUM
4867 && to == HARD_FRAME_POINTER_REGNUM)
4868 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4869 else
4871 gcc_assert (to == STACK_POINTER_REGNUM);
4873 if (from == ARG_POINTER_REGNUM)
4874 return frame.stack_pointer_offset;
4876 gcc_assert (from == FRAME_POINTER_REGNUM);
4877 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4881 /* Fill structure ix86_frame about frame of currently computed function. */
4883 static void
4884 ix86_compute_frame_layout (struct ix86_frame *frame)
4886 HOST_WIDE_INT total_size;
4887 unsigned int stack_alignment_needed;
4888 HOST_WIDE_INT offset;
4889 unsigned int preferred_alignment;
4890 HOST_WIDE_INT size = get_frame_size ();
4892 frame->nregs = ix86_nsaved_regs ();
4893 total_size = size;
4895 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4896 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4898 /* During reload iteration the amount of registers saved can change.
4899 Recompute the value as needed. Do not recompute when amount of registers
4900 didn't change as reload does multiple calls to the function and does not
4901 expect the decision to change within single iteration. */
4902 if (!optimize_size
4903 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4905 int count = frame->nregs;
4907 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4908 /* The fast prologue uses move instead of push to save registers. This
4909 is significantly longer, but also executes faster as modern hardware
4910 can execute the moves in parallel, but can't do that for push/pop.
4912 Be careful about choosing what prologue to emit: When function takes
4913 many instructions to execute we may use slow version as well as in
4914 case function is known to be outside hot spot (this is known with
4915 feedback only). Weight the size of function by number of registers
4916 to save as it is cheap to use one or two push instructions but very
4917 slow to use many of them. */
4918 if (count)
4919 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4920 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4921 || (flag_branch_probabilities
4922 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4923 cfun->machine->use_fast_prologue_epilogue = false;
4924 else
4925 cfun->machine->use_fast_prologue_epilogue
4926 = !expensive_function_p (count);
4928 if (TARGET_PROLOGUE_USING_MOVE
4929 && cfun->machine->use_fast_prologue_epilogue)
4930 frame->save_regs_using_mov = true;
4931 else
4932 frame->save_regs_using_mov = false;
4935 /* Skip return address and saved base pointer. */
4936 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4938 frame->hard_frame_pointer_offset = offset;
4940 /* Do some sanity checking of stack_alignment_needed and
4941 preferred_alignment, since i386 port is the only using those features
4942 that may break easily. */
4944 gcc_assert (!size || stack_alignment_needed);
4945 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4946 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4947 gcc_assert (stack_alignment_needed
4948 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4950 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4951 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4953 /* Register save area */
4954 offset += frame->nregs * UNITS_PER_WORD;
4956 /* Va-arg area */
4957 if (ix86_save_varrargs_registers)
4959 offset += X86_64_VARARGS_SIZE;
4960 frame->va_arg_size = X86_64_VARARGS_SIZE;
4962 else
4963 frame->va_arg_size = 0;
4965 /* Align start of frame for local function. */
4966 frame->padding1 = ((offset + stack_alignment_needed - 1)
4967 & -stack_alignment_needed) - offset;
4969 offset += frame->padding1;
4971 /* Frame pointer points here. */
4972 frame->frame_pointer_offset = offset;
4974 offset += size;
4976 /* Add outgoing arguments area. Can be skipped if we eliminated
4977 all the function calls as dead code.
4978 Skipping is however impossible when function calls alloca. Alloca
4979 expander assumes that last current_function_outgoing_args_size
4980 of stack frame are unused. */
4981 if (ACCUMULATE_OUTGOING_ARGS
4982 && (!current_function_is_leaf || current_function_calls_alloca
4983 || ix86_current_function_calls_tls_descriptor))
4985 offset += current_function_outgoing_args_size;
4986 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4988 else
4989 frame->outgoing_arguments_size = 0;
4991 /* Align stack boundary. Only needed if we're calling another function
4992 or using alloca. */
4993 if (!current_function_is_leaf || current_function_calls_alloca
4994 || ix86_current_function_calls_tls_descriptor)
4995 frame->padding2 = ((offset + preferred_alignment - 1)
4996 & -preferred_alignment) - offset;
4997 else
4998 frame->padding2 = 0;
5000 offset += frame->padding2;
5002 /* We've reached end of stack frame. */
5003 frame->stack_pointer_offset = offset;
5005 /* Size prologue needs to allocate. */
5006 frame->to_allocate =
5007 (size + frame->padding1 + frame->padding2
5008 + frame->outgoing_arguments_size + frame->va_arg_size);
5010 if ((!frame->to_allocate && frame->nregs <= 1)
5011 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5012 frame->save_regs_using_mov = false;
5014 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5015 && current_function_is_leaf
5016 && !ix86_current_function_calls_tls_descriptor)
5018 frame->red_zone_size = frame->to_allocate;
5019 if (frame->save_regs_using_mov)
5020 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5021 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5022 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5024 else
5025 frame->red_zone_size = 0;
5026 frame->to_allocate -= frame->red_zone_size;
5027 frame->stack_pointer_offset -= frame->red_zone_size;
5028 #if 0
5029 fprintf (stderr, "nregs: %i\n", frame->nregs);
5030 fprintf (stderr, "size: %i\n", size);
5031 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5032 fprintf (stderr, "padding1: %i\n", frame->padding1);
5033 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5034 fprintf (stderr, "padding2: %i\n", frame->padding2);
5035 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5036 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5037 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5038 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5039 frame->hard_frame_pointer_offset);
5040 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5041 #endif
5044 /* Emit code to save registers in the prologue. */
5046 static void
5047 ix86_emit_save_regs (void)
5049 unsigned int regno;
5050 rtx insn;
5052 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5053 if (ix86_save_reg (regno, true))
5055 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5056 RTX_FRAME_RELATED_P (insn) = 1;
5060 /* Emit code to save registers using MOV insns. First register
5061 is restored from POINTER + OFFSET. */
5062 static void
5063 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5065 unsigned int regno;
5066 rtx insn;
5068 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5069 if (ix86_save_reg (regno, true))
5071 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5072 Pmode, offset),
5073 gen_rtx_REG (Pmode, regno));
5074 RTX_FRAME_RELATED_P (insn) = 1;
5075 offset += UNITS_PER_WORD;
5079 /* Expand prologue or epilogue stack adjustment.
5080 The pattern exist to put a dependency on all ebp-based memory accesses.
5081 STYLE should be negative if instructions should be marked as frame related,
5082 zero if %r11 register is live and cannot be freely used and positive
5083 otherwise. */
5085 static void
5086 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5088 rtx insn;
5090 if (! TARGET_64BIT)
5091 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5092 else if (x86_64_immediate_operand (offset, DImode))
5093 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5094 else
5096 rtx r11;
5097 /* r11 is used by indirect sibcall return as well, set before the
5098 epilogue and used after the epilogue. ATM indirect sibcall
5099 shouldn't be used together with huge frame sizes in one
5100 function because of the frame_size check in sibcall.c. */
5101 gcc_assert (style);
5102 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5103 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5104 if (style < 0)
5105 RTX_FRAME_RELATED_P (insn) = 1;
5106 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5107 offset));
5109 if (style < 0)
5110 RTX_FRAME_RELATED_P (insn) = 1;
5113 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5115 static rtx
5116 ix86_internal_arg_pointer (void)
5118 if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5119 && DECL_NAME (current_function_decl)
5120 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5121 && DECL_FILE_SCOPE_P (current_function_decl))
5123 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5124 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5126 else
5127 return virtual_incoming_args_rtx;
5130 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5131 This is called from dwarf2out.c to emit call frame instructions
5132 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5133 static void
5134 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5136 rtx unspec = SET_SRC (pattern);
5137 gcc_assert (GET_CODE (unspec) == UNSPEC);
5139 switch (index)
5141 case UNSPEC_REG_SAVE:
5142 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5143 SET_DEST (pattern));
5144 break;
5145 case UNSPEC_DEF_CFA:
5146 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5147 INTVAL (XVECEXP (unspec, 0, 0)));
5148 break;
5149 default:
5150 gcc_unreachable ();
5154 /* Expand the prologue into a bunch of separate insns. */
5156 void
5157 ix86_expand_prologue (void)
5159 rtx insn;
5160 bool pic_reg_used;
5161 struct ix86_frame frame;
5162 HOST_WIDE_INT allocate;
5164 ix86_compute_frame_layout (&frame);
5166 if (cfun->machine->force_align_arg_pointer)
5168 rtx x, y;
5170 /* Grab the argument pointer. */
5171 x = plus_constant (stack_pointer_rtx, 4);
5172 y = cfun->machine->force_align_arg_pointer;
5173 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5174 RTX_FRAME_RELATED_P (insn) = 1;
5176 /* The unwind info consists of two parts: install the fafp as the cfa,
5177 and record the fafp as the "save register" of the stack pointer.
5178 The later is there in order that the unwinder can see where it
5179 should restore the stack pointer across the and insn. */
5180 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5181 x = gen_rtx_SET (VOIDmode, y, x);
5182 RTX_FRAME_RELATED_P (x) = 1;
5183 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5184 UNSPEC_REG_SAVE);
5185 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5186 RTX_FRAME_RELATED_P (y) = 1;
5187 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5188 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5189 REG_NOTES (insn) = x;
5191 /* Align the stack. */
5192 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5193 GEN_INT (-16)));
5195 /* And here we cheat like madmen with the unwind info. We force the
5196 cfa register back to sp+4, which is exactly what it was at the
5197 start of the function. Re-pushing the return address results in
5198 the return at the same spot relative to the cfa, and thus is
5199 correct wrt the unwind info. */
5200 x = cfun->machine->force_align_arg_pointer;
5201 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5202 insn = emit_insn (gen_push (x));
5203 RTX_FRAME_RELATED_P (insn) = 1;
5205 x = GEN_INT (4);
5206 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5207 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5208 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5209 REG_NOTES (insn) = x;
5212 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5213 slower on all targets. Also sdb doesn't like it. */
5215 if (frame_pointer_needed)
5217 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5218 RTX_FRAME_RELATED_P (insn) = 1;
5220 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5221 RTX_FRAME_RELATED_P (insn) = 1;
5224 allocate = frame.to_allocate;
5226 if (!frame.save_regs_using_mov)
5227 ix86_emit_save_regs ();
5228 else
5229 allocate += frame.nregs * UNITS_PER_WORD;
5231 /* When using red zone we may start register saving before allocating
5232 the stack frame saving one cycle of the prologue. */
5233 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5234 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5235 : stack_pointer_rtx,
5236 -frame.nregs * UNITS_PER_WORD);
5238 if (allocate == 0)
5240 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5241 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5242 GEN_INT (-allocate), -1);
5243 else
5245 /* Only valid for Win32. */
5246 rtx eax = gen_rtx_REG (SImode, 0);
5247 bool eax_live = ix86_eax_live_at_start_p ();
5248 rtx t;
5250 gcc_assert (!TARGET_64BIT);
5252 if (eax_live)
5254 emit_insn (gen_push (eax));
5255 allocate -= 4;
5258 emit_move_insn (eax, GEN_INT (allocate));
5260 insn = emit_insn (gen_allocate_stack_worker (eax));
5261 RTX_FRAME_RELATED_P (insn) = 1;
5262 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5263 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5264 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5265 t, REG_NOTES (insn));
5267 if (eax_live)
5269 if (frame_pointer_needed)
5270 t = plus_constant (hard_frame_pointer_rtx,
5271 allocate
5272 - frame.to_allocate
5273 - frame.nregs * UNITS_PER_WORD);
5274 else
5275 t = plus_constant (stack_pointer_rtx, allocate);
5276 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5280 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5282 if (!frame_pointer_needed || !frame.to_allocate)
5283 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5284 else
5285 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5286 -frame.nregs * UNITS_PER_WORD);
5289 pic_reg_used = false;
5290 if (pic_offset_table_rtx
5291 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5292 || current_function_profile))
5294 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5296 if (alt_pic_reg_used != INVALID_REGNUM)
5297 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5299 pic_reg_used = true;
5302 if (pic_reg_used)
5304 if (TARGET_64BIT)
5305 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5306 else
5307 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5309 /* Even with accurate pre-reload life analysis, we can wind up
5310 deleting all references to the pic register after reload.
5311 Consider if cross-jumping unifies two sides of a branch
5312 controlled by a comparison vs the only read from a global.
5313 In which case, allow the set_got to be deleted, though we're
5314 too late to do anything about the ebx save in the prologue. */
5315 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5318 /* Prevent function calls from be scheduled before the call to mcount.
5319 In the pic_reg_used case, make sure that the got load isn't deleted. */
5320 if (current_function_profile)
5321 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5324 /* Emit code to restore saved registers using MOV insns. First register
5325 is restored from POINTER + OFFSET. */
5326 static void
5327 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5328 int maybe_eh_return)
5330 int regno;
5331 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5333 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5334 if (ix86_save_reg (regno, maybe_eh_return))
5336 /* Ensure that adjust_address won't be forced to produce pointer
5337 out of range allowed by x86-64 instruction set. */
5338 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5340 rtx r11;
5342 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5343 emit_move_insn (r11, GEN_INT (offset));
5344 emit_insn (gen_adddi3 (r11, r11, pointer));
5345 base_address = gen_rtx_MEM (Pmode, r11);
5346 offset = 0;
5348 emit_move_insn (gen_rtx_REG (Pmode, regno),
5349 adjust_address (base_address, Pmode, offset));
5350 offset += UNITS_PER_WORD;
5354 /* Restore function stack, frame, and registers. */
5356 void
5357 ix86_expand_epilogue (int style)
5359 int regno;
5360 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5361 struct ix86_frame frame;
5362 HOST_WIDE_INT offset;
5364 ix86_compute_frame_layout (&frame);
5366 /* Calculate start of saved registers relative to ebp. Special care
5367 must be taken for the normal return case of a function using
5368 eh_return: the eax and edx registers are marked as saved, but not
5369 restored along this path. */
5370 offset = frame.nregs;
5371 if (current_function_calls_eh_return && style != 2)
5372 offset -= 2;
5373 offset *= -UNITS_PER_WORD;
5375 /* If we're only restoring one register and sp is not valid then
5376 using a move instruction to restore the register since it's
5377 less work than reloading sp and popping the register.
5379 The default code result in stack adjustment using add/lea instruction,
5380 while this code results in LEAVE instruction (or discrete equivalent),
5381 so it is profitable in some other cases as well. Especially when there
5382 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5383 and there is exactly one register to pop. This heuristic may need some
5384 tuning in future. */
5385 if ((!sp_valid && frame.nregs <= 1)
5386 || (TARGET_EPILOGUE_USING_MOVE
5387 && cfun->machine->use_fast_prologue_epilogue
5388 && (frame.nregs > 1 || frame.to_allocate))
5389 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5390 || (frame_pointer_needed && TARGET_USE_LEAVE
5391 && cfun->machine->use_fast_prologue_epilogue
5392 && frame.nregs == 1)
5393 || current_function_calls_eh_return)
5395 /* Restore registers. We can use ebp or esp to address the memory
5396 locations. If both are available, default to ebp, since offsets
5397 are known to be small. Only exception is esp pointing directly to the
5398 end of block of saved registers, where we may simplify addressing
5399 mode. */
5401 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5402 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5403 frame.to_allocate, style == 2);
5404 else
5405 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5406 offset, style == 2);
5408 /* eh_return epilogues need %ecx added to the stack pointer. */
5409 if (style == 2)
5411 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5413 if (frame_pointer_needed)
5415 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5416 tmp = plus_constant (tmp, UNITS_PER_WORD);
5417 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5419 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5420 emit_move_insn (hard_frame_pointer_rtx, tmp);
5422 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5423 const0_rtx, style);
5425 else
5427 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5428 tmp = plus_constant (tmp, (frame.to_allocate
5429 + frame.nregs * UNITS_PER_WORD));
5430 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5433 else if (!frame_pointer_needed)
5434 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5435 GEN_INT (frame.to_allocate
5436 + frame.nregs * UNITS_PER_WORD),
5437 style);
5438 /* If not an i386, mov & pop is faster than "leave". */
5439 else if (TARGET_USE_LEAVE || optimize_size
5440 || !cfun->machine->use_fast_prologue_epilogue)
5441 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5442 else
5444 pro_epilogue_adjust_stack (stack_pointer_rtx,
5445 hard_frame_pointer_rtx,
5446 const0_rtx, style);
5447 if (TARGET_64BIT)
5448 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5449 else
5450 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5453 else
5455 /* First step is to deallocate the stack frame so that we can
5456 pop the registers. */
5457 if (!sp_valid)
5459 gcc_assert (frame_pointer_needed);
5460 pro_epilogue_adjust_stack (stack_pointer_rtx,
5461 hard_frame_pointer_rtx,
5462 GEN_INT (offset), style);
5464 else if (frame.to_allocate)
5465 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5466 GEN_INT (frame.to_allocate), style);
5468 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5469 if (ix86_save_reg (regno, false))
5471 if (TARGET_64BIT)
5472 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5473 else
5474 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5476 if (frame_pointer_needed)
5478 /* Leave results in shorter dependency chains on CPUs that are
5479 able to grok it fast. */
5480 if (TARGET_USE_LEAVE)
5481 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5482 else if (TARGET_64BIT)
5483 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5484 else
5485 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5489 if (cfun->machine->force_align_arg_pointer)
5491 emit_insn (gen_addsi3 (stack_pointer_rtx,
5492 cfun->machine->force_align_arg_pointer,
5493 GEN_INT (-4)));
5496 /* Sibcall epilogues don't want a return instruction. */
5497 if (style == 0)
5498 return;
5500 if (current_function_pops_args && current_function_args_size)
5502 rtx popc = GEN_INT (current_function_pops_args);
5504 /* i386 can only pop 64K bytes. If asked to pop more, pop
5505 return address, do explicit add, and jump indirectly to the
5506 caller. */
5508 if (current_function_pops_args >= 65536)
5510 rtx ecx = gen_rtx_REG (SImode, 2);
5512 /* There is no "pascal" calling convention in 64bit ABI. */
5513 gcc_assert (!TARGET_64BIT);
5515 emit_insn (gen_popsi1 (ecx));
5516 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5517 emit_jump_insn (gen_return_indirect_internal (ecx));
5519 else
5520 emit_jump_insn (gen_return_pop_internal (popc));
5522 else
5523 emit_jump_insn (gen_return_internal ());
5526 /* Reset from the function's potential modifications. */
5528 static void
5529 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5530 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5532 if (pic_offset_table_rtx)
5533 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5536 /* Extract the parts of an RTL expression that is a valid memory address
5537 for an instruction. Return 0 if the structure of the address is
5538 grossly off. Return -1 if the address contains ASHIFT, so it is not
5539 strictly valid, but still used for computing length of lea instruction. */
5542 ix86_decompose_address (rtx addr, struct ix86_address *out)
5544 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5545 rtx base_reg, index_reg;
5546 HOST_WIDE_INT scale = 1;
5547 rtx scale_rtx = NULL_RTX;
5548 int retval = 1;
5549 enum ix86_address_seg seg = SEG_DEFAULT;
5551 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5552 base = addr;
5553 else if (GET_CODE (addr) == PLUS)
5555 rtx addends[4], op;
5556 int n = 0, i;
5558 op = addr;
5561 if (n >= 4)
5562 return 0;
5563 addends[n++] = XEXP (op, 1);
5564 op = XEXP (op, 0);
5566 while (GET_CODE (op) == PLUS);
5567 if (n >= 4)
5568 return 0;
5569 addends[n] = op;
5571 for (i = n; i >= 0; --i)
5573 op = addends[i];
5574 switch (GET_CODE (op))
5576 case MULT:
5577 if (index)
5578 return 0;
5579 index = XEXP (op, 0);
5580 scale_rtx = XEXP (op, 1);
5581 break;
5583 case UNSPEC:
5584 if (XINT (op, 1) == UNSPEC_TP
5585 && TARGET_TLS_DIRECT_SEG_REFS
5586 && seg == SEG_DEFAULT)
5587 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5588 else
5589 return 0;
5590 break;
5592 case REG:
5593 case SUBREG:
5594 if (!base)
5595 base = op;
5596 else if (!index)
5597 index = op;
5598 else
5599 return 0;
5600 break;
5602 case CONST:
5603 case CONST_INT:
5604 case SYMBOL_REF:
5605 case LABEL_REF:
5606 if (disp)
5607 return 0;
5608 disp = op;
5609 break;
5611 default:
5612 return 0;
5616 else if (GET_CODE (addr) == MULT)
5618 index = XEXP (addr, 0); /* index*scale */
5619 scale_rtx = XEXP (addr, 1);
5621 else if (GET_CODE (addr) == ASHIFT)
5623 rtx tmp;
5625 /* We're called for lea too, which implements ashift on occasion. */
5626 index = XEXP (addr, 0);
5627 tmp = XEXP (addr, 1);
5628 if (GET_CODE (tmp) != CONST_INT)
5629 return 0;
5630 scale = INTVAL (tmp);
5631 if ((unsigned HOST_WIDE_INT) scale > 3)
5632 return 0;
5633 scale = 1 << scale;
5634 retval = -1;
5636 else
5637 disp = addr; /* displacement */
5639 /* Extract the integral value of scale. */
5640 if (scale_rtx)
5642 if (GET_CODE (scale_rtx) != CONST_INT)
5643 return 0;
5644 scale = INTVAL (scale_rtx);
5647 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5648 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5650 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5651 if (base_reg && index_reg && scale == 1
5652 && (index_reg == arg_pointer_rtx
5653 || index_reg == frame_pointer_rtx
5654 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5656 rtx tmp;
5657 tmp = base, base = index, index = tmp;
5658 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5661 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5662 if ((base_reg == hard_frame_pointer_rtx
5663 || base_reg == frame_pointer_rtx
5664 || base_reg == arg_pointer_rtx) && !disp)
5665 disp = const0_rtx;
5667 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5668 Avoid this by transforming to [%esi+0]. */
5669 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5670 && base_reg && !index_reg && !disp
5671 && REG_P (base_reg)
5672 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5673 disp = const0_rtx;
5675 /* Special case: encode reg+reg instead of reg*2. */
5676 if (!base && index && scale && scale == 2)
5677 base = index, base_reg = index_reg, scale = 1;
5679 /* Special case: scaling cannot be encoded without base or displacement. */
5680 if (!base && !disp && index && scale != 1)
5681 disp = const0_rtx;
5683 out->base = base;
5684 out->index = index;
5685 out->disp = disp;
5686 out->scale = scale;
5687 out->seg = seg;
5689 return retval;
5692 /* Return cost of the memory address x.
5693 For i386, it is better to use a complex address than let gcc copy
5694 the address into a reg and make a new pseudo. But not if the address
5695 requires to two regs - that would mean more pseudos with longer
5696 lifetimes. */
5697 static int
5698 ix86_address_cost (rtx x)
5700 struct ix86_address parts;
5701 int cost = 1;
5702 int ok = ix86_decompose_address (x, &parts);
5704 gcc_assert (ok);
5706 if (parts.base && GET_CODE (parts.base) == SUBREG)
5707 parts.base = SUBREG_REG (parts.base);
5708 if (parts.index && GET_CODE (parts.index) == SUBREG)
5709 parts.index = SUBREG_REG (parts.index);
5711 /* More complex memory references are better. */
5712 if (parts.disp && parts.disp != const0_rtx)
5713 cost--;
5714 if (parts.seg != SEG_DEFAULT)
5715 cost--;
5717 /* Attempt to minimize number of registers in the address. */
5718 if ((parts.base
5719 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5720 || (parts.index
5721 && (!REG_P (parts.index)
5722 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5723 cost++;
5725 if (parts.base
5726 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5727 && parts.index
5728 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5729 && parts.base != parts.index)
5730 cost++;
5732 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5733 since it's predecode logic can't detect the length of instructions
5734 and it degenerates to vector decoded. Increase cost of such
5735 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5736 to split such addresses or even refuse such addresses at all.
5738 Following addressing modes are affected:
5739 [base+scale*index]
5740 [scale*index+disp]
5741 [base+index]
5743 The first and last case may be avoidable by explicitly coding the zero in
5744 memory address, but I don't have AMD-K6 machine handy to check this
5745 theory. */
5747 if (TARGET_K6
5748 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5749 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5750 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5751 cost += 10;
5753 return cost;
5756 /* If X is a machine specific address (i.e. a symbol or label being
5757 referenced as a displacement from the GOT implemented using an
5758 UNSPEC), then return the base term. Otherwise return X. */
5761 ix86_find_base_term (rtx x)
5763 rtx term;
5765 if (TARGET_64BIT)
5767 if (GET_CODE (x) != CONST)
5768 return x;
5769 term = XEXP (x, 0);
5770 if (GET_CODE (term) == PLUS
5771 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5772 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5773 term = XEXP (term, 0);
5774 if (GET_CODE (term) != UNSPEC
5775 || XINT (term, 1) != UNSPEC_GOTPCREL)
5776 return x;
5778 term = XVECEXP (term, 0, 0);
5780 if (GET_CODE (term) != SYMBOL_REF
5781 && GET_CODE (term) != LABEL_REF)
5782 return x;
5784 return term;
5787 term = ix86_delegitimize_address (x);
5789 if (GET_CODE (term) != SYMBOL_REF
5790 && GET_CODE (term) != LABEL_REF)
5791 return x;
5793 return term;
5796 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5797 this is used for to form addresses to local data when -fPIC is in
5798 use. */
5800 static bool
5801 darwin_local_data_pic (rtx disp)
5803 if (GET_CODE (disp) == MINUS)
5805 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5806 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5807 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5809 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5810 if (! strcmp (sym_name, "<pic base>"))
5811 return true;
5815 return false;
5818 /* Determine if a given RTX is a valid constant. We already know this
5819 satisfies CONSTANT_P. */
5821 bool
5822 legitimate_constant_p (rtx x)
5824 switch (GET_CODE (x))
5826 case CONST:
5827 x = XEXP (x, 0);
5829 if (GET_CODE (x) == PLUS)
5831 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5832 return false;
5833 x = XEXP (x, 0);
5836 if (TARGET_MACHO && darwin_local_data_pic (x))
5837 return true;
5839 /* Only some unspecs are valid as "constants". */
5840 if (GET_CODE (x) == UNSPEC)
5841 switch (XINT (x, 1))
5843 case UNSPEC_GOTOFF:
5844 return TARGET_64BIT;
5845 case UNSPEC_TPOFF:
5846 case UNSPEC_NTPOFF:
5847 x = XVECEXP (x, 0, 0);
5848 return (GET_CODE (x) == SYMBOL_REF
5849 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5850 case UNSPEC_DTPOFF:
5851 x = XVECEXP (x, 0, 0);
5852 return (GET_CODE (x) == SYMBOL_REF
5853 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
5854 default:
5855 return false;
5858 /* We must have drilled down to a symbol. */
5859 if (GET_CODE (x) == LABEL_REF)
5860 return true;
5861 if (GET_CODE (x) != SYMBOL_REF)
5862 return false;
5863 /* FALLTHRU */
5865 case SYMBOL_REF:
5866 /* TLS symbols are never valid. */
5867 if (SYMBOL_REF_TLS_MODEL (x))
5868 return false;
5869 break;
5871 default:
5872 break;
5875 /* Otherwise we handle everything else in the move patterns. */
5876 return true;
5879 /* Determine if it's legal to put X into the constant pool. This
5880 is not possible for the address of thread-local symbols, which
5881 is checked above. */
5883 static bool
5884 ix86_cannot_force_const_mem (rtx x)
5886 return !legitimate_constant_p (x);
5889 /* Determine if a given RTX is a valid constant address. */
5891 bool
5892 constant_address_p (rtx x)
5894 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5897 /* Nonzero if the constant value X is a legitimate general operand
5898 when generating PIC code. It is given that flag_pic is on and
5899 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5901 bool
5902 legitimate_pic_operand_p (rtx x)
5904 rtx inner;
5906 switch (GET_CODE (x))
5908 case CONST:
5909 inner = XEXP (x, 0);
5910 if (GET_CODE (inner) == PLUS
5911 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
5912 inner = XEXP (inner, 0);
5914 /* Only some unspecs are valid as "constants". */
5915 if (GET_CODE (inner) == UNSPEC)
5916 switch (XINT (inner, 1))
5918 case UNSPEC_GOTOFF:
5919 return TARGET_64BIT;
5920 case UNSPEC_TPOFF:
5921 x = XVECEXP (inner, 0, 0);
5922 return (GET_CODE (x) == SYMBOL_REF
5923 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5924 default:
5925 return false;
5927 /* FALLTHRU */
5929 case SYMBOL_REF:
5930 case LABEL_REF:
5931 return legitimate_pic_address_disp_p (x);
5933 default:
5934 return true;
5938 /* Determine if a given CONST RTX is a valid memory displacement
5939 in PIC mode. */
5942 legitimate_pic_address_disp_p (rtx disp)
5944 bool saw_plus;
5946 /* In 64bit mode we can allow direct addresses of symbols and labels
5947 when they are not dynamic symbols. */
5948 if (TARGET_64BIT)
5950 rtx op0 = disp, op1;
5952 switch (GET_CODE (disp))
5954 case LABEL_REF:
5955 return true;
5957 case CONST:
5958 if (GET_CODE (XEXP (disp, 0)) != PLUS)
5959 break;
5960 op0 = XEXP (XEXP (disp, 0), 0);
5961 op1 = XEXP (XEXP (disp, 0), 1);
5962 if (GET_CODE (op1) != CONST_INT
5963 || INTVAL (op1) >= 16*1024*1024
5964 || INTVAL (op1) < -16*1024*1024)
5965 break;
5966 if (GET_CODE (op0) == LABEL_REF)
5967 return true;
5968 if (GET_CODE (op0) != SYMBOL_REF)
5969 break;
5970 /* FALLTHRU */
5972 case SYMBOL_REF:
5973 /* TLS references should always be enclosed in UNSPEC. */
5974 if (SYMBOL_REF_TLS_MODEL (op0))
5975 return false;
5976 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
5977 return true;
5978 break;
5980 default:
5981 break;
5984 if (GET_CODE (disp) != CONST)
5985 return 0;
5986 disp = XEXP (disp, 0);
5988 if (TARGET_64BIT)
5990 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5991 of GOT tables. We should not need these anyway. */
5992 if (GET_CODE (disp) != UNSPEC
5993 || (XINT (disp, 1) != UNSPEC_GOTPCREL
5994 && XINT (disp, 1) != UNSPEC_GOTOFF))
5995 return 0;
5997 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5998 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5999 return 0;
6000 return 1;
6003 saw_plus = false;
6004 if (GET_CODE (disp) == PLUS)
6006 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6007 return 0;
6008 disp = XEXP (disp, 0);
6009 saw_plus = true;
6012 if (TARGET_MACHO && darwin_local_data_pic (disp))
6013 return 1;
6015 if (GET_CODE (disp) != UNSPEC)
6016 return 0;
6018 switch (XINT (disp, 1))
6020 case UNSPEC_GOT:
6021 if (saw_plus)
6022 return false;
6023 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6024 case UNSPEC_GOTOFF:
6025 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6026 While ABI specify also 32bit relocation but we don't produce it in
6027 small PIC model at all. */
6028 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6029 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6030 && !TARGET_64BIT)
6031 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6032 return false;
6033 case UNSPEC_GOTTPOFF:
6034 case UNSPEC_GOTNTPOFF:
6035 case UNSPEC_INDNTPOFF:
6036 if (saw_plus)
6037 return false;
6038 disp = XVECEXP (disp, 0, 0);
6039 return (GET_CODE (disp) == SYMBOL_REF
6040 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6041 case UNSPEC_NTPOFF:
6042 disp = XVECEXP (disp, 0, 0);
6043 return (GET_CODE (disp) == SYMBOL_REF
6044 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6045 case UNSPEC_DTPOFF:
6046 disp = XVECEXP (disp, 0, 0);
6047 return (GET_CODE (disp) == SYMBOL_REF
6048 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6051 return 0;
6054 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6055 memory address for an instruction. The MODE argument is the machine mode
6056 for the MEM expression that wants to use this address.
6058 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6059 convert common non-canonical forms to canonical form so that they will
6060 be recognized. */
6063 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6065 struct ix86_address parts;
6066 rtx base, index, disp;
6067 HOST_WIDE_INT scale;
6068 const char *reason = NULL;
6069 rtx reason_rtx = NULL_RTX;
6071 if (TARGET_DEBUG_ADDR)
6073 fprintf (stderr,
6074 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6075 GET_MODE_NAME (mode), strict);
6076 debug_rtx (addr);
6079 if (ix86_decompose_address (addr, &parts) <= 0)
6081 reason = "decomposition failed";
6082 goto report_error;
6085 base = parts.base;
6086 index = parts.index;
6087 disp = parts.disp;
6088 scale = parts.scale;
6090 /* Validate base register.
6092 Don't allow SUBREG's that span more than a word here. It can lead to spill
6093 failures when the base is one word out of a two word structure, which is
6094 represented internally as a DImode int. */
6096 if (base)
6098 rtx reg;
6099 reason_rtx = base;
6101 if (REG_P (base))
6102 reg = base;
6103 else if (GET_CODE (base) == SUBREG
6104 && REG_P (SUBREG_REG (base))
6105 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6106 <= UNITS_PER_WORD)
6107 reg = SUBREG_REG (base);
6108 else
6110 reason = "base is not a register";
6111 goto report_error;
6114 if (GET_MODE (base) != Pmode)
6116 reason = "base is not in Pmode";
6117 goto report_error;
6120 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6121 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6123 reason = "base is not valid";
6124 goto report_error;
6128 /* Validate index register.
6130 Don't allow SUBREG's that span more than a word here -- same as above. */
6132 if (index)
6134 rtx reg;
6135 reason_rtx = index;
6137 if (REG_P (index))
6138 reg = index;
6139 else if (GET_CODE (index) == SUBREG
6140 && REG_P (SUBREG_REG (index))
6141 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6142 <= UNITS_PER_WORD)
6143 reg = SUBREG_REG (index);
6144 else
6146 reason = "index is not a register";
6147 goto report_error;
6150 if (GET_MODE (index) != Pmode)
6152 reason = "index is not in Pmode";
6153 goto report_error;
6156 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6157 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6159 reason = "index is not valid";
6160 goto report_error;
6164 /* Validate scale factor. */
6165 if (scale != 1)
6167 reason_rtx = GEN_INT (scale);
6168 if (!index)
6170 reason = "scale without index";
6171 goto report_error;
6174 if (scale != 2 && scale != 4 && scale != 8)
6176 reason = "scale is not a valid multiplier";
6177 goto report_error;
6181 /* Validate displacement. */
6182 if (disp)
6184 reason_rtx = disp;
6186 if (GET_CODE (disp) == CONST
6187 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6188 switch (XINT (XEXP (disp, 0), 1))
6190 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6191 used. While ABI specify also 32bit relocations, we don't produce
6192 them at all and use IP relative instead. */
6193 case UNSPEC_GOT:
6194 case UNSPEC_GOTOFF:
6195 gcc_assert (flag_pic);
6196 if (!TARGET_64BIT)
6197 goto is_legitimate_pic;
6198 reason = "64bit address unspec";
6199 goto report_error;
6201 case UNSPEC_GOTPCREL:
6202 gcc_assert (flag_pic);
6203 goto is_legitimate_pic;
6205 case UNSPEC_GOTTPOFF:
6206 case UNSPEC_GOTNTPOFF:
6207 case UNSPEC_INDNTPOFF:
6208 case UNSPEC_NTPOFF:
6209 case UNSPEC_DTPOFF:
6210 break;
6212 default:
6213 reason = "invalid address unspec";
6214 goto report_error;
6217 else if (flag_pic && (SYMBOLIC_CONST (disp)
6218 #if TARGET_MACHO
6219 && !machopic_operand_p (disp)
6220 #endif
6223 is_legitimate_pic:
6224 if (TARGET_64BIT && (index || base))
6226 /* foo@dtpoff(%rX) is ok. */
6227 if (GET_CODE (disp) != CONST
6228 || GET_CODE (XEXP (disp, 0)) != PLUS
6229 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6230 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6231 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6232 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6234 reason = "non-constant pic memory reference";
6235 goto report_error;
6238 else if (! legitimate_pic_address_disp_p (disp))
6240 reason = "displacement is an invalid pic construct";
6241 goto report_error;
6244 /* This code used to verify that a symbolic pic displacement
6245 includes the pic_offset_table_rtx register.
6247 While this is good idea, unfortunately these constructs may
6248 be created by "adds using lea" optimization for incorrect
6249 code like:
6251 int a;
6252 int foo(int i)
6254 return *(&a+i);
6257 This code is nonsensical, but results in addressing
6258 GOT table with pic_offset_table_rtx base. We can't
6259 just refuse it easily, since it gets matched by
6260 "addsi3" pattern, that later gets split to lea in the
6261 case output register differs from input. While this
6262 can be handled by separate addsi pattern for this case
6263 that never results in lea, this seems to be easier and
6264 correct fix for crash to disable this test. */
6266 else if (GET_CODE (disp) != LABEL_REF
6267 && GET_CODE (disp) != CONST_INT
6268 && (GET_CODE (disp) != CONST
6269 || !legitimate_constant_p (disp))
6270 && (GET_CODE (disp) != SYMBOL_REF
6271 || !legitimate_constant_p (disp)))
6273 reason = "displacement is not constant";
6274 goto report_error;
6276 else if (TARGET_64BIT
6277 && !x86_64_immediate_operand (disp, VOIDmode))
6279 reason = "displacement is out of range";
6280 goto report_error;
6284 /* Everything looks valid. */
6285 if (TARGET_DEBUG_ADDR)
6286 fprintf (stderr, "Success.\n");
6287 return TRUE;
6289 report_error:
6290 if (TARGET_DEBUG_ADDR)
6292 fprintf (stderr, "Error: %s\n", reason);
6293 debug_rtx (reason_rtx);
6295 return FALSE;
6298 /* Return a unique alias set for the GOT. */
6300 static HOST_WIDE_INT
6301 ix86_GOT_alias_set (void)
6303 static HOST_WIDE_INT set = -1;
6304 if (set == -1)
6305 set = new_alias_set ();
6306 return set;
6309 /* Return a legitimate reference for ORIG (an address) using the
6310 register REG. If REG is 0, a new pseudo is generated.
6312 There are two types of references that must be handled:
6314 1. Global data references must load the address from the GOT, via
6315 the PIC reg. An insn is emitted to do this load, and the reg is
6316 returned.
6318 2. Static data references, constant pool addresses, and code labels
6319 compute the address as an offset from the GOT, whose base is in
6320 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6321 differentiate them from global data objects. The returned
6322 address is the PIC reg + an unspec constant.
6324 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6325 reg also appears in the address. */
6327 static rtx
6328 legitimize_pic_address (rtx orig, rtx reg)
6330 rtx addr = orig;
6331 rtx new = orig;
6332 rtx base;
6334 #if TARGET_MACHO
6335 if (reg == 0)
6336 reg = gen_reg_rtx (Pmode);
6337 /* Use the generic Mach-O PIC machinery. */
6338 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6339 #endif
6341 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6342 new = addr;
6343 else if (TARGET_64BIT
6344 && ix86_cmodel != CM_SMALL_PIC
6345 && local_symbolic_operand (addr, Pmode))
6347 rtx tmpreg;
6348 /* This symbol may be referenced via a displacement from the PIC
6349 base address (@GOTOFF). */
6351 if (reload_in_progress)
6352 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6353 if (GET_CODE (addr) == CONST)
6354 addr = XEXP (addr, 0);
6355 if (GET_CODE (addr) == PLUS)
6357 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6358 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6360 else
6361 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6362 new = gen_rtx_CONST (Pmode, new);
6363 if (!reg)
6364 tmpreg = gen_reg_rtx (Pmode);
6365 else
6366 tmpreg = reg;
6367 emit_move_insn (tmpreg, new);
6369 if (reg != 0)
6371 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6372 tmpreg, 1, OPTAB_DIRECT);
6373 new = reg;
6375 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6377 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6379 /* This symbol may be referenced via a displacement from the PIC
6380 base address (@GOTOFF). */
6382 if (reload_in_progress)
6383 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6384 if (GET_CODE (addr) == CONST)
6385 addr = XEXP (addr, 0);
6386 if (GET_CODE (addr) == PLUS)
6388 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6389 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6391 else
6392 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6393 new = gen_rtx_CONST (Pmode, new);
6394 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6396 if (reg != 0)
6398 emit_move_insn (reg, new);
6399 new = reg;
6402 else if (GET_CODE (addr) == SYMBOL_REF)
6404 if (TARGET_64BIT)
6406 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6407 new = gen_rtx_CONST (Pmode, new);
6408 new = gen_const_mem (Pmode, new);
6409 set_mem_alias_set (new, ix86_GOT_alias_set ());
6411 if (reg == 0)
6412 reg = gen_reg_rtx (Pmode);
6413 /* Use directly gen_movsi, otherwise the address is loaded
6414 into register for CSE. We don't want to CSE this addresses,
6415 instead we CSE addresses from the GOT table, so skip this. */
6416 emit_insn (gen_movsi (reg, new));
6417 new = reg;
6419 else
6421 /* This symbol must be referenced via a load from the
6422 Global Offset Table (@GOT). */
6424 if (reload_in_progress)
6425 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6426 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6427 new = gen_rtx_CONST (Pmode, new);
6428 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6429 new = gen_const_mem (Pmode, new);
6430 set_mem_alias_set (new, ix86_GOT_alias_set ());
6432 if (reg == 0)
6433 reg = gen_reg_rtx (Pmode);
6434 emit_move_insn (reg, new);
6435 new = reg;
6438 else
6440 if (GET_CODE (addr) == CONST_INT
6441 && !x86_64_immediate_operand (addr, VOIDmode))
6443 if (reg)
6445 emit_move_insn (reg, addr);
6446 new = reg;
6448 else
6449 new = force_reg (Pmode, addr);
6451 else if (GET_CODE (addr) == CONST)
6453 addr = XEXP (addr, 0);
6455 /* We must match stuff we generate before. Assume the only
6456 unspecs that can get here are ours. Not that we could do
6457 anything with them anyway.... */
6458 if (GET_CODE (addr) == UNSPEC
6459 || (GET_CODE (addr) == PLUS
6460 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6461 return orig;
6462 gcc_assert (GET_CODE (addr) == PLUS);
6464 if (GET_CODE (addr) == PLUS)
6466 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6468 /* Check first to see if this is a constant offset from a @GOTOFF
6469 symbol reference. */
6470 if (local_symbolic_operand (op0, Pmode)
6471 && GET_CODE (op1) == CONST_INT)
6473 if (!TARGET_64BIT)
6475 if (reload_in_progress)
6476 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6477 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6478 UNSPEC_GOTOFF);
6479 new = gen_rtx_PLUS (Pmode, new, op1);
6480 new = gen_rtx_CONST (Pmode, new);
6481 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6483 if (reg != 0)
6485 emit_move_insn (reg, new);
6486 new = reg;
6489 else
6491 if (INTVAL (op1) < -16*1024*1024
6492 || INTVAL (op1) >= 16*1024*1024)
6494 if (!x86_64_immediate_operand (op1, Pmode))
6495 op1 = force_reg (Pmode, op1);
6496 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6500 else
6502 base = legitimize_pic_address (XEXP (addr, 0), reg);
6503 new = legitimize_pic_address (XEXP (addr, 1),
6504 base == reg ? NULL_RTX : reg);
6506 if (GET_CODE (new) == CONST_INT)
6507 new = plus_constant (base, INTVAL (new));
6508 else
6510 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6512 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6513 new = XEXP (new, 1);
6515 new = gen_rtx_PLUS (Pmode, base, new);
6520 return new;
6523 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6525 static rtx
6526 get_thread_pointer (int to_reg)
6528 rtx tp, reg, insn;
6530 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6531 if (!to_reg)
6532 return tp;
6534 reg = gen_reg_rtx (Pmode);
6535 insn = gen_rtx_SET (VOIDmode, reg, tp);
6536 insn = emit_insn (insn);
6538 return reg;
6541 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6542 false if we expect this to be used for a memory address and true if
6543 we expect to load the address into a register. */
6545 static rtx
6546 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6548 rtx dest, base, off, pic, tp;
6549 int type;
6551 switch (model)
6553 case TLS_MODEL_GLOBAL_DYNAMIC:
6554 dest = gen_reg_rtx (Pmode);
6555 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6557 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6559 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6561 start_sequence ();
6562 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6563 insns = get_insns ();
6564 end_sequence ();
6566 emit_libcall_block (insns, dest, rax, x);
6568 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6569 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6570 else
6571 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6573 if (TARGET_GNU2_TLS)
6575 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6577 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6579 break;
6581 case TLS_MODEL_LOCAL_DYNAMIC:
6582 base = gen_reg_rtx (Pmode);
6583 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6585 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6587 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6589 start_sequence ();
6590 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6591 insns = get_insns ();
6592 end_sequence ();
6594 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6595 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6596 emit_libcall_block (insns, base, rax, note);
6598 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6599 emit_insn (gen_tls_local_dynamic_base_64 (base));
6600 else
6601 emit_insn (gen_tls_local_dynamic_base_32 (base));
6603 if (TARGET_GNU2_TLS)
6605 rtx x = ix86_tls_module_base ();
6607 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, base));
6609 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6612 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6613 off = gen_rtx_CONST (Pmode, off);
6615 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6616 break;
6618 case TLS_MODEL_INITIAL_EXEC:
6619 if (TARGET_64BIT)
6621 pic = NULL;
6622 type = UNSPEC_GOTNTPOFF;
6624 else if (flag_pic)
6626 if (reload_in_progress)
6627 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6628 pic = pic_offset_table_rtx;
6629 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6631 else if (!TARGET_ANY_GNU_TLS)
6633 pic = gen_reg_rtx (Pmode);
6634 emit_insn (gen_set_got (pic));
6635 type = UNSPEC_GOTTPOFF;
6637 else
6639 pic = NULL;
6640 type = UNSPEC_INDNTPOFF;
6643 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6644 off = gen_rtx_CONST (Pmode, off);
6645 if (pic)
6646 off = gen_rtx_PLUS (Pmode, pic, off);
6647 off = gen_const_mem (Pmode, off);
6648 set_mem_alias_set (off, ix86_GOT_alias_set ());
6650 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6652 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6653 off = force_reg (Pmode, off);
6654 return gen_rtx_PLUS (Pmode, base, off);
6656 else
6658 base = get_thread_pointer (true);
6659 dest = gen_reg_rtx (Pmode);
6660 emit_insn (gen_subsi3 (dest, base, off));
6662 break;
6664 case TLS_MODEL_LOCAL_EXEC:
6665 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6666 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6667 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6668 off = gen_rtx_CONST (Pmode, off);
6670 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6672 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6673 return gen_rtx_PLUS (Pmode, base, off);
6675 else
6677 base = get_thread_pointer (true);
6678 dest = gen_reg_rtx (Pmode);
6679 emit_insn (gen_subsi3 (dest, base, off));
6681 break;
6683 default:
6684 gcc_unreachable ();
6687 return dest;
6690 /* Try machine-dependent ways of modifying an illegitimate address
6691 to be legitimate. If we find one, return the new, valid address.
6692 This macro is used in only one place: `memory_address' in explow.c.
6694 OLDX is the address as it was before break_out_memory_refs was called.
6695 In some cases it is useful to look at this to decide what needs to be done.
6697 MODE and WIN are passed so that this macro can use
6698 GO_IF_LEGITIMATE_ADDRESS.
6700 It is always safe for this macro to do nothing. It exists to recognize
6701 opportunities to optimize the output.
6703 For the 80386, we handle X+REG by loading X into a register R and
6704 using R+REG. R will go in a general reg and indexing will be used.
6705 However, if REG is a broken-out memory address or multiplication,
6706 nothing needs to be done because REG can certainly go in a general reg.
6708 When -fpic is used, special handling is needed for symbolic references.
6709 See comments by legitimize_pic_address in i386.c for details. */
6712 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6714 int changed = 0;
6715 unsigned log;
6717 if (TARGET_DEBUG_ADDR)
6719 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6720 GET_MODE_NAME (mode));
6721 debug_rtx (x);
6724 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6725 if (log)
6726 return legitimize_tls_address (x, log, false);
6727 if (GET_CODE (x) == CONST
6728 && GET_CODE (XEXP (x, 0)) == PLUS
6729 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6730 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6732 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6733 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6736 if (flag_pic && SYMBOLIC_CONST (x))
6737 return legitimize_pic_address (x, 0);
6739 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6740 if (GET_CODE (x) == ASHIFT
6741 && GET_CODE (XEXP (x, 1)) == CONST_INT
6742 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6744 changed = 1;
6745 log = INTVAL (XEXP (x, 1));
6746 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6747 GEN_INT (1 << log));
6750 if (GET_CODE (x) == PLUS)
6752 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6754 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6755 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6756 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6758 changed = 1;
6759 log = INTVAL (XEXP (XEXP (x, 0), 1));
6760 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6761 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6762 GEN_INT (1 << log));
6765 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6766 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6767 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6769 changed = 1;
6770 log = INTVAL (XEXP (XEXP (x, 1), 1));
6771 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6772 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6773 GEN_INT (1 << log));
6776 /* Put multiply first if it isn't already. */
6777 if (GET_CODE (XEXP (x, 1)) == MULT)
6779 rtx tmp = XEXP (x, 0);
6780 XEXP (x, 0) = XEXP (x, 1);
6781 XEXP (x, 1) = tmp;
6782 changed = 1;
6785 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6786 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6787 created by virtual register instantiation, register elimination, and
6788 similar optimizations. */
6789 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6791 changed = 1;
6792 x = gen_rtx_PLUS (Pmode,
6793 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6794 XEXP (XEXP (x, 1), 0)),
6795 XEXP (XEXP (x, 1), 1));
6798 /* Canonicalize
6799 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6800 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6801 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6802 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6803 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6804 && CONSTANT_P (XEXP (x, 1)))
6806 rtx constant;
6807 rtx other = NULL_RTX;
6809 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6811 constant = XEXP (x, 1);
6812 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6814 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6816 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6817 other = XEXP (x, 1);
6819 else
6820 constant = 0;
6822 if (constant)
6824 changed = 1;
6825 x = gen_rtx_PLUS (Pmode,
6826 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6827 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6828 plus_constant (other, INTVAL (constant)));
6832 if (changed && legitimate_address_p (mode, x, FALSE))
6833 return x;
6835 if (GET_CODE (XEXP (x, 0)) == MULT)
6837 changed = 1;
6838 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6841 if (GET_CODE (XEXP (x, 1)) == MULT)
6843 changed = 1;
6844 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6847 if (changed
6848 && GET_CODE (XEXP (x, 1)) == REG
6849 && GET_CODE (XEXP (x, 0)) == REG)
6850 return x;
6852 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6854 changed = 1;
6855 x = legitimize_pic_address (x, 0);
6858 if (changed && legitimate_address_p (mode, x, FALSE))
6859 return x;
6861 if (GET_CODE (XEXP (x, 0)) == REG)
6863 rtx temp = gen_reg_rtx (Pmode);
6864 rtx val = force_operand (XEXP (x, 1), temp);
6865 if (val != temp)
6866 emit_move_insn (temp, val);
6868 XEXP (x, 1) = temp;
6869 return x;
6872 else if (GET_CODE (XEXP (x, 1)) == REG)
6874 rtx temp = gen_reg_rtx (Pmode);
6875 rtx val = force_operand (XEXP (x, 0), temp);
6876 if (val != temp)
6877 emit_move_insn (temp, val);
6879 XEXP (x, 0) = temp;
6880 return x;
6884 return x;
6887 /* Print an integer constant expression in assembler syntax. Addition
6888 and subtraction are the only arithmetic that may appear in these
6889 expressions. FILE is the stdio stream to write to, X is the rtx, and
6890 CODE is the operand print code from the output string. */
6892 static void
6893 output_pic_addr_const (FILE *file, rtx x, int code)
6895 char buf[256];
6897 switch (GET_CODE (x))
6899 case PC:
6900 gcc_assert (flag_pic);
6901 putc ('.', file);
6902 break;
6904 case SYMBOL_REF:
6905 output_addr_const (file, x);
6906 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6907 fputs ("@PLT", file);
6908 break;
6910 case LABEL_REF:
6911 x = XEXP (x, 0);
6912 /* FALLTHRU */
6913 case CODE_LABEL:
6914 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6915 assemble_name (asm_out_file, buf);
6916 break;
6918 case CONST_INT:
6919 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6920 break;
6922 case CONST:
6923 /* This used to output parentheses around the expression,
6924 but that does not work on the 386 (either ATT or BSD assembler). */
6925 output_pic_addr_const (file, XEXP (x, 0), code);
6926 break;
6928 case CONST_DOUBLE:
6929 if (GET_MODE (x) == VOIDmode)
6931 /* We can use %d if the number is <32 bits and positive. */
6932 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6933 fprintf (file, "0x%lx%08lx",
6934 (unsigned long) CONST_DOUBLE_HIGH (x),
6935 (unsigned long) CONST_DOUBLE_LOW (x));
6936 else
6937 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6939 else
6940 /* We can't handle floating point constants;
6941 PRINT_OPERAND must handle them. */
6942 output_operand_lossage ("floating constant misused");
6943 break;
6945 case PLUS:
6946 /* Some assemblers need integer constants to appear first. */
6947 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6949 output_pic_addr_const (file, XEXP (x, 0), code);
6950 putc ('+', file);
6951 output_pic_addr_const (file, XEXP (x, 1), code);
6953 else
6955 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6956 output_pic_addr_const (file, XEXP (x, 1), code);
6957 putc ('+', file);
6958 output_pic_addr_const (file, XEXP (x, 0), code);
6960 break;
6962 case MINUS:
6963 if (!TARGET_MACHO)
6964 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6965 output_pic_addr_const (file, XEXP (x, 0), code);
6966 putc ('-', file);
6967 output_pic_addr_const (file, XEXP (x, 1), code);
6968 if (!TARGET_MACHO)
6969 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6970 break;
6972 case UNSPEC:
6973 gcc_assert (XVECLEN (x, 0) == 1);
6974 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6975 switch (XINT (x, 1))
6977 case UNSPEC_GOT:
6978 fputs ("@GOT", file);
6979 break;
6980 case UNSPEC_GOTOFF:
6981 fputs ("@GOTOFF", file);
6982 break;
6983 case UNSPEC_GOTPCREL:
6984 fputs ("@GOTPCREL(%rip)", file);
6985 break;
6986 case UNSPEC_GOTTPOFF:
6987 /* FIXME: This might be @TPOFF in Sun ld too. */
6988 fputs ("@GOTTPOFF", file);
6989 break;
6990 case UNSPEC_TPOFF:
6991 fputs ("@TPOFF", file);
6992 break;
6993 case UNSPEC_NTPOFF:
6994 if (TARGET_64BIT)
6995 fputs ("@TPOFF", file);
6996 else
6997 fputs ("@NTPOFF", file);
6998 break;
6999 case UNSPEC_DTPOFF:
7000 fputs ("@DTPOFF", file);
7001 break;
7002 case UNSPEC_GOTNTPOFF:
7003 if (TARGET_64BIT)
7004 fputs ("@GOTTPOFF(%rip)", file);
7005 else
7006 fputs ("@GOTNTPOFF", file);
7007 break;
7008 case UNSPEC_INDNTPOFF:
7009 fputs ("@INDNTPOFF", file);
7010 break;
7011 default:
7012 output_operand_lossage ("invalid UNSPEC as operand");
7013 break;
7015 break;
7017 default:
7018 output_operand_lossage ("invalid expression as operand");
7022 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7023 We need to emit DTP-relative relocations. */
7025 static void
7026 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7028 fputs (ASM_LONG, file);
7029 output_addr_const (file, x);
7030 fputs ("@DTPOFF", file);
7031 switch (size)
7033 case 4:
7034 break;
7035 case 8:
7036 fputs (", 0", file);
7037 break;
7038 default:
7039 gcc_unreachable ();
7043 /* In the name of slightly smaller debug output, and to cater to
7044 general assembler lossage, recognize PIC+GOTOFF and turn it back
7045 into a direct symbol reference. */
7047 static rtx
7048 ix86_delegitimize_address (rtx orig_x)
7050 rtx x = orig_x, y;
7052 if (GET_CODE (x) == MEM)
7053 x = XEXP (x, 0);
7055 if (TARGET_64BIT)
7057 if (GET_CODE (x) != CONST
7058 || GET_CODE (XEXP (x, 0)) != UNSPEC
7059 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7060 || GET_CODE (orig_x) != MEM)
7061 return orig_x;
7062 return XVECEXP (XEXP (x, 0), 0, 0);
7065 if (GET_CODE (x) != PLUS
7066 || GET_CODE (XEXP (x, 1)) != CONST)
7067 return orig_x;
7069 if (GET_CODE (XEXP (x, 0)) == REG
7070 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7071 /* %ebx + GOT/GOTOFF */
7072 y = NULL;
7073 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7075 /* %ebx + %reg * scale + GOT/GOTOFF */
7076 y = XEXP (x, 0);
7077 if (GET_CODE (XEXP (y, 0)) == REG
7078 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
7079 y = XEXP (y, 1);
7080 else if (GET_CODE (XEXP (y, 1)) == REG
7081 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7082 y = XEXP (y, 0);
7083 else
7084 return orig_x;
7085 if (GET_CODE (y) != REG
7086 && GET_CODE (y) != MULT
7087 && GET_CODE (y) != ASHIFT)
7088 return orig_x;
7090 else
7091 return orig_x;
7093 x = XEXP (XEXP (x, 1), 0);
7094 if (GET_CODE (x) == UNSPEC
7095 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7096 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7098 if (y)
7099 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7100 return XVECEXP (x, 0, 0);
7103 if (GET_CODE (x) == PLUS
7104 && GET_CODE (XEXP (x, 0)) == UNSPEC
7105 && GET_CODE (XEXP (x, 1)) == CONST_INT
7106 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7107 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7108 && GET_CODE (orig_x) != MEM)))
7110 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7111 if (y)
7112 return gen_rtx_PLUS (Pmode, y, x);
7113 return x;
7116 return orig_x;
7119 static void
7120 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7121 int fp, FILE *file)
7123 const char *suffix;
7125 if (mode == CCFPmode || mode == CCFPUmode)
7127 enum rtx_code second_code, bypass_code;
7128 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7129 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7130 code = ix86_fp_compare_code_to_integer (code);
7131 mode = CCmode;
7133 if (reverse)
7134 code = reverse_condition (code);
7136 switch (code)
7138 case EQ:
7139 suffix = "e";
7140 break;
7141 case NE:
7142 suffix = "ne";
7143 break;
7144 case GT:
7145 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7146 suffix = "g";
7147 break;
7148 case GTU:
7149 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7150 Those same assemblers have the same but opposite lossage on cmov. */
7151 gcc_assert (mode == CCmode);
7152 suffix = fp ? "nbe" : "a";
7153 break;
7154 case LT:
7155 switch (mode)
7157 case CCNOmode:
7158 case CCGOCmode:
7159 suffix = "s";
7160 break;
7162 case CCmode:
7163 case CCGCmode:
7164 suffix = "l";
7165 break;
7167 default:
7168 gcc_unreachable ();
7170 break;
7171 case LTU:
7172 gcc_assert (mode == CCmode);
7173 suffix = "b";
7174 break;
7175 case GE:
7176 switch (mode)
7178 case CCNOmode:
7179 case CCGOCmode:
7180 suffix = "ns";
7181 break;
7183 case CCmode:
7184 case CCGCmode:
7185 suffix = "ge";
7186 break;
7188 default:
7189 gcc_unreachable ();
7191 break;
7192 case GEU:
7193 /* ??? As above. */
7194 gcc_assert (mode == CCmode);
7195 suffix = fp ? "nb" : "ae";
7196 break;
7197 case LE:
7198 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7199 suffix = "le";
7200 break;
7201 case LEU:
7202 gcc_assert (mode == CCmode);
7203 suffix = "be";
7204 break;
7205 case UNORDERED:
7206 suffix = fp ? "u" : "p";
7207 break;
7208 case ORDERED:
7209 suffix = fp ? "nu" : "np";
7210 break;
7211 default:
7212 gcc_unreachable ();
7214 fputs (suffix, file);
7217 /* Print the name of register X to FILE based on its machine mode and number.
7218 If CODE is 'w', pretend the mode is HImode.
7219 If CODE is 'b', pretend the mode is QImode.
7220 If CODE is 'k', pretend the mode is SImode.
7221 If CODE is 'q', pretend the mode is DImode.
7222 If CODE is 'h', pretend the reg is the 'high' byte register.
7223 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7225 void
7226 print_reg (rtx x, int code, FILE *file)
7228 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7229 && REGNO (x) != FRAME_POINTER_REGNUM
7230 && REGNO (x) != FLAGS_REG
7231 && REGNO (x) != FPSR_REG);
7233 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7234 putc ('%', file);
7236 if (code == 'w' || MMX_REG_P (x))
7237 code = 2;
7238 else if (code == 'b')
7239 code = 1;
7240 else if (code == 'k')
7241 code = 4;
7242 else if (code == 'q')
7243 code = 8;
7244 else if (code == 'y')
7245 code = 3;
7246 else if (code == 'h')
7247 code = 0;
7248 else
7249 code = GET_MODE_SIZE (GET_MODE (x));
7251 /* Irritatingly, AMD extended registers use different naming convention
7252 from the normal registers. */
7253 if (REX_INT_REG_P (x))
7255 gcc_assert (TARGET_64BIT);
7256 switch (code)
7258 case 0:
7259 error ("extended registers have no high halves");
7260 break;
7261 case 1:
7262 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7263 break;
7264 case 2:
7265 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7266 break;
7267 case 4:
7268 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7269 break;
7270 case 8:
7271 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7272 break;
7273 default:
7274 error ("unsupported operand size for extended register");
7275 break;
7277 return;
7279 switch (code)
7281 case 3:
7282 if (STACK_TOP_P (x))
7284 fputs ("st(0)", file);
7285 break;
7287 /* FALLTHRU */
7288 case 8:
7289 case 4:
7290 case 12:
7291 if (! ANY_FP_REG_P (x))
7292 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7293 /* FALLTHRU */
7294 case 16:
7295 case 2:
7296 normal:
7297 fputs (hi_reg_name[REGNO (x)], file);
7298 break;
7299 case 1:
7300 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7301 goto normal;
7302 fputs (qi_reg_name[REGNO (x)], file);
7303 break;
7304 case 0:
7305 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7306 goto normal;
7307 fputs (qi_high_reg_name[REGNO (x)], file);
7308 break;
7309 default:
7310 gcc_unreachable ();
7314 /* Locate some local-dynamic symbol still in use by this function
7315 so that we can print its name in some tls_local_dynamic_base
7316 pattern. */
7318 static const char *
7319 get_some_local_dynamic_name (void)
7321 rtx insn;
7323 if (cfun->machine->some_ld_name)
7324 return cfun->machine->some_ld_name;
7326 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7327 if (INSN_P (insn)
7328 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7329 return cfun->machine->some_ld_name;
7331 gcc_unreachable ();
7334 static int
7335 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7337 rtx x = *px;
7339 if (GET_CODE (x) == SYMBOL_REF
7340 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7342 cfun->machine->some_ld_name = XSTR (x, 0);
7343 return 1;
7346 return 0;
7349 /* Meaning of CODE:
7350 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7351 C -- print opcode suffix for set/cmov insn.
7352 c -- like C, but print reversed condition
7353 F,f -- likewise, but for floating-point.
7354 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7355 otherwise nothing
7356 R -- print the prefix for register names.
7357 z -- print the opcode suffix for the size of the current operand.
7358 * -- print a star (in certain assembler syntax)
7359 A -- print an absolute memory reference.
7360 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7361 s -- print a shift double count, followed by the assemblers argument
7362 delimiter.
7363 b -- print the QImode name of the register for the indicated operand.
7364 %b0 would print %al if operands[0] is reg 0.
7365 w -- likewise, print the HImode name of the register.
7366 k -- likewise, print the SImode name of the register.
7367 q -- likewise, print the DImode name of the register.
7368 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7369 y -- print "st(0)" instead of "st" as a register.
7370 D -- print condition for SSE cmp instruction.
7371 P -- if PIC, print an @PLT suffix.
7372 X -- don't print any sort of PIC '@' suffix for a symbol.
7373 & -- print some in-use local-dynamic symbol name.
7374 H -- print a memory address offset by 8; used for sse high-parts
7377 void
7378 print_operand (FILE *file, rtx x, int code)
7380 if (code)
7382 switch (code)
7384 case '*':
7385 if (ASSEMBLER_DIALECT == ASM_ATT)
7386 putc ('*', file);
7387 return;
7389 case '&':
7390 assemble_name (file, get_some_local_dynamic_name ());
7391 return;
7393 case 'A':
7394 switch (ASSEMBLER_DIALECT)
7396 case ASM_ATT:
7397 putc ('*', file);
7398 break;
7400 case ASM_INTEL:
7401 /* Intel syntax. For absolute addresses, registers should not
7402 be surrounded by braces. */
7403 if (GET_CODE (x) != REG)
7405 putc ('[', file);
7406 PRINT_OPERAND (file, x, 0);
7407 putc (']', file);
7408 return;
7410 break;
7412 default:
7413 gcc_unreachable ();
7416 PRINT_OPERAND (file, x, 0);
7417 return;
7420 case 'L':
7421 if (ASSEMBLER_DIALECT == ASM_ATT)
7422 putc ('l', file);
7423 return;
7425 case 'W':
7426 if (ASSEMBLER_DIALECT == ASM_ATT)
7427 putc ('w', file);
7428 return;
7430 case 'B':
7431 if (ASSEMBLER_DIALECT == ASM_ATT)
7432 putc ('b', file);
7433 return;
7435 case 'Q':
7436 if (ASSEMBLER_DIALECT == ASM_ATT)
7437 putc ('l', file);
7438 return;
7440 case 'S':
7441 if (ASSEMBLER_DIALECT == ASM_ATT)
7442 putc ('s', file);
7443 return;
7445 case 'T':
7446 if (ASSEMBLER_DIALECT == ASM_ATT)
7447 putc ('t', file);
7448 return;
7450 case 'z':
7451 /* 387 opcodes don't get size suffixes if the operands are
7452 registers. */
7453 if (STACK_REG_P (x))
7454 return;
7456 /* Likewise if using Intel opcodes. */
7457 if (ASSEMBLER_DIALECT == ASM_INTEL)
7458 return;
7460 /* This is the size of op from size of operand. */
7461 switch (GET_MODE_SIZE (GET_MODE (x)))
7463 case 2:
7464 #ifdef HAVE_GAS_FILDS_FISTS
7465 putc ('s', file);
7466 #endif
7467 return;
7469 case 4:
7470 if (GET_MODE (x) == SFmode)
7472 putc ('s', file);
7473 return;
7475 else
7476 putc ('l', file);
7477 return;
7479 case 12:
7480 case 16:
7481 putc ('t', file);
7482 return;
7484 case 8:
7485 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7487 #ifdef GAS_MNEMONICS
7488 putc ('q', file);
7489 #else
7490 putc ('l', file);
7491 putc ('l', file);
7492 #endif
7494 else
7495 putc ('l', file);
7496 return;
7498 default:
7499 gcc_unreachable ();
7502 case 'b':
7503 case 'w':
7504 case 'k':
7505 case 'q':
7506 case 'h':
7507 case 'y':
7508 case 'X':
7509 case 'P':
7510 break;
7512 case 's':
7513 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7515 PRINT_OPERAND (file, x, 0);
7516 putc (',', file);
7518 return;
7520 case 'D':
7521 /* Little bit of braindamage here. The SSE compare instructions
7522 does use completely different names for the comparisons that the
7523 fp conditional moves. */
7524 switch (GET_CODE (x))
7526 case EQ:
7527 case UNEQ:
7528 fputs ("eq", file);
7529 break;
7530 case LT:
7531 case UNLT:
7532 fputs ("lt", file);
7533 break;
7534 case LE:
7535 case UNLE:
7536 fputs ("le", file);
7537 break;
7538 case UNORDERED:
7539 fputs ("unord", file);
7540 break;
7541 case NE:
7542 case LTGT:
7543 fputs ("neq", file);
7544 break;
7545 case UNGE:
7546 case GE:
7547 fputs ("nlt", file);
7548 break;
7549 case UNGT:
7550 case GT:
7551 fputs ("nle", file);
7552 break;
7553 case ORDERED:
7554 fputs ("ord", file);
7555 break;
7556 default:
7557 gcc_unreachable ();
7559 return;
7560 case 'O':
7561 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7562 if (ASSEMBLER_DIALECT == ASM_ATT)
7564 switch (GET_MODE (x))
7566 case HImode: putc ('w', file); break;
7567 case SImode:
7568 case SFmode: putc ('l', file); break;
7569 case DImode:
7570 case DFmode: putc ('q', file); break;
7571 default: gcc_unreachable ();
7573 putc ('.', file);
7575 #endif
7576 return;
7577 case 'C':
7578 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7579 return;
7580 case 'F':
7581 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7582 if (ASSEMBLER_DIALECT == ASM_ATT)
7583 putc ('.', file);
7584 #endif
7585 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7586 return;
7588 /* Like above, but reverse condition */
7589 case 'c':
7590 /* Check to see if argument to %c is really a constant
7591 and not a condition code which needs to be reversed. */
7592 if (!COMPARISON_P (x))
7594 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7595 return;
7597 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7598 return;
7599 case 'f':
7600 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7601 if (ASSEMBLER_DIALECT == ASM_ATT)
7602 putc ('.', file);
7603 #endif
7604 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7605 return;
7607 case 'H':
7608 /* It doesn't actually matter what mode we use here, as we're
7609 only going to use this for printing. */
7610 x = adjust_address_nv (x, DImode, 8);
7611 break;
7613 case '+':
7615 rtx x;
7617 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7618 return;
7620 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7621 if (x)
7623 int pred_val = INTVAL (XEXP (x, 0));
7625 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7626 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7628 int taken = pred_val > REG_BR_PROB_BASE / 2;
7629 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7631 /* Emit hints only in the case default branch prediction
7632 heuristics would fail. */
7633 if (taken != cputaken)
7635 /* We use 3e (DS) prefix for taken branches and
7636 2e (CS) prefix for not taken branches. */
7637 if (taken)
7638 fputs ("ds ; ", file);
7639 else
7640 fputs ("cs ; ", file);
7644 return;
7646 default:
7647 output_operand_lossage ("invalid operand code '%c'", code);
7651 if (GET_CODE (x) == REG)
7652 print_reg (x, code, file);
7654 else if (GET_CODE (x) == MEM)
7656 /* No `byte ptr' prefix for call instructions. */
7657 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7659 const char * size;
7660 switch (GET_MODE_SIZE (GET_MODE (x)))
7662 case 1: size = "BYTE"; break;
7663 case 2: size = "WORD"; break;
7664 case 4: size = "DWORD"; break;
7665 case 8: size = "QWORD"; break;
7666 case 12: size = "XWORD"; break;
7667 case 16: size = "XMMWORD"; break;
7668 default:
7669 gcc_unreachable ();
7672 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7673 if (code == 'b')
7674 size = "BYTE";
7675 else if (code == 'w')
7676 size = "WORD";
7677 else if (code == 'k')
7678 size = "DWORD";
7680 fputs (size, file);
7681 fputs (" PTR ", file);
7684 x = XEXP (x, 0);
7685 /* Avoid (%rip) for call operands. */
7686 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7687 && GET_CODE (x) != CONST_INT)
7688 output_addr_const (file, x);
7689 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7690 output_operand_lossage ("invalid constraints for operand");
7691 else
7692 output_address (x);
7695 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7697 REAL_VALUE_TYPE r;
7698 long l;
7700 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7701 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7703 if (ASSEMBLER_DIALECT == ASM_ATT)
7704 putc ('$', file);
7705 fprintf (file, "0x%08lx", l);
7708 /* These float cases don't actually occur as immediate operands. */
7709 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7711 char dstr[30];
7713 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7714 fprintf (file, "%s", dstr);
7717 else if (GET_CODE (x) == CONST_DOUBLE
7718 && GET_MODE (x) == XFmode)
7720 char dstr[30];
7722 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7723 fprintf (file, "%s", dstr);
7726 else
7728 /* We have patterns that allow zero sets of memory, for instance.
7729 In 64-bit mode, we should probably support all 8-byte vectors,
7730 since we can in fact encode that into an immediate. */
7731 if (GET_CODE (x) == CONST_VECTOR)
7733 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7734 x = const0_rtx;
7737 if (code != 'P')
7739 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7741 if (ASSEMBLER_DIALECT == ASM_ATT)
7742 putc ('$', file);
7744 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7745 || GET_CODE (x) == LABEL_REF)
7747 if (ASSEMBLER_DIALECT == ASM_ATT)
7748 putc ('$', file);
7749 else
7750 fputs ("OFFSET FLAT:", file);
7753 if (GET_CODE (x) == CONST_INT)
7754 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7755 else if (flag_pic)
7756 output_pic_addr_const (file, x, code);
7757 else
7758 output_addr_const (file, x);
7762 /* Print a memory operand whose address is ADDR. */
7764 void
7765 print_operand_address (FILE *file, rtx addr)
7767 struct ix86_address parts;
7768 rtx base, index, disp;
7769 int scale;
7770 int ok = ix86_decompose_address (addr, &parts);
7772 gcc_assert (ok);
7774 base = parts.base;
7775 index = parts.index;
7776 disp = parts.disp;
7777 scale = parts.scale;
7779 switch (parts.seg)
7781 case SEG_DEFAULT:
7782 break;
7783 case SEG_FS:
7784 case SEG_GS:
7785 if (USER_LABEL_PREFIX[0] == 0)
7786 putc ('%', file);
7787 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7788 break;
7789 default:
7790 gcc_unreachable ();
7793 if (!base && !index)
7795 /* Displacement only requires special attention. */
7797 if (GET_CODE (disp) == CONST_INT)
7799 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7801 if (USER_LABEL_PREFIX[0] == 0)
7802 putc ('%', file);
7803 fputs ("ds:", file);
7805 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7807 else if (flag_pic)
7808 output_pic_addr_const (file, disp, 0);
7809 else
7810 output_addr_const (file, disp);
7812 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7813 if (TARGET_64BIT)
7815 if (GET_CODE (disp) == CONST
7816 && GET_CODE (XEXP (disp, 0)) == PLUS
7817 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7818 disp = XEXP (XEXP (disp, 0), 0);
7819 if (GET_CODE (disp) == LABEL_REF
7820 || (GET_CODE (disp) == SYMBOL_REF
7821 && SYMBOL_REF_TLS_MODEL (disp) == 0))
7822 fputs ("(%rip)", file);
7825 else
7827 if (ASSEMBLER_DIALECT == ASM_ATT)
7829 if (disp)
7831 if (flag_pic)
7832 output_pic_addr_const (file, disp, 0);
7833 else if (GET_CODE (disp) == LABEL_REF)
7834 output_asm_label (disp);
7835 else
7836 output_addr_const (file, disp);
7839 putc ('(', file);
7840 if (base)
7841 print_reg (base, 0, file);
7842 if (index)
7844 putc (',', file);
7845 print_reg (index, 0, file);
7846 if (scale != 1)
7847 fprintf (file, ",%d", scale);
7849 putc (')', file);
7851 else
7853 rtx offset = NULL_RTX;
7855 if (disp)
7857 /* Pull out the offset of a symbol; print any symbol itself. */
7858 if (GET_CODE (disp) == CONST
7859 && GET_CODE (XEXP (disp, 0)) == PLUS
7860 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7862 offset = XEXP (XEXP (disp, 0), 1);
7863 disp = gen_rtx_CONST (VOIDmode,
7864 XEXP (XEXP (disp, 0), 0));
7867 if (flag_pic)
7868 output_pic_addr_const (file, disp, 0);
7869 else if (GET_CODE (disp) == LABEL_REF)
7870 output_asm_label (disp);
7871 else if (GET_CODE (disp) == CONST_INT)
7872 offset = disp;
7873 else
7874 output_addr_const (file, disp);
7877 putc ('[', file);
7878 if (base)
7880 print_reg (base, 0, file);
7881 if (offset)
7883 if (INTVAL (offset) >= 0)
7884 putc ('+', file);
7885 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7888 else if (offset)
7889 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7890 else
7891 putc ('0', file);
7893 if (index)
7895 putc ('+', file);
7896 print_reg (index, 0, file);
7897 if (scale != 1)
7898 fprintf (file, "*%d", scale);
7900 putc (']', file);
7905 bool
7906 output_addr_const_extra (FILE *file, rtx x)
7908 rtx op;
7910 if (GET_CODE (x) != UNSPEC)
7911 return false;
7913 op = XVECEXP (x, 0, 0);
7914 switch (XINT (x, 1))
7916 case UNSPEC_GOTTPOFF:
7917 output_addr_const (file, op);
7918 /* FIXME: This might be @TPOFF in Sun ld. */
7919 fputs ("@GOTTPOFF", file);
7920 break;
7921 case UNSPEC_TPOFF:
7922 output_addr_const (file, op);
7923 fputs ("@TPOFF", file);
7924 break;
7925 case UNSPEC_NTPOFF:
7926 output_addr_const (file, op);
7927 if (TARGET_64BIT)
7928 fputs ("@TPOFF", file);
7929 else
7930 fputs ("@NTPOFF", file);
7931 break;
7932 case UNSPEC_DTPOFF:
7933 output_addr_const (file, op);
7934 fputs ("@DTPOFF", file);
7935 break;
7936 case UNSPEC_GOTNTPOFF:
7937 output_addr_const (file, op);
7938 if (TARGET_64BIT)
7939 fputs ("@GOTTPOFF(%rip)", file);
7940 else
7941 fputs ("@GOTNTPOFF", file);
7942 break;
7943 case UNSPEC_INDNTPOFF:
7944 output_addr_const (file, op);
7945 fputs ("@INDNTPOFF", file);
7946 break;
7948 default:
7949 return false;
7952 return true;
7955 /* Split one or more DImode RTL references into pairs of SImode
7956 references. The RTL can be REG, offsettable MEM, integer constant, or
7957 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7958 split and "num" is its length. lo_half and hi_half are output arrays
7959 that parallel "operands". */
7961 void
7962 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7964 while (num--)
7966 rtx op = operands[num];
7968 /* simplify_subreg refuse to split volatile memory addresses,
7969 but we still have to handle it. */
7970 if (GET_CODE (op) == MEM)
7972 lo_half[num] = adjust_address (op, SImode, 0);
7973 hi_half[num] = adjust_address (op, SImode, 4);
7975 else
7977 lo_half[num] = simplify_gen_subreg (SImode, op,
7978 GET_MODE (op) == VOIDmode
7979 ? DImode : GET_MODE (op), 0);
7980 hi_half[num] = simplify_gen_subreg (SImode, op,
7981 GET_MODE (op) == VOIDmode
7982 ? DImode : GET_MODE (op), 4);
7986 /* Split one or more TImode RTL references into pairs of DImode
7987 references. The RTL can be REG, offsettable MEM, integer constant, or
7988 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7989 split and "num" is its length. lo_half and hi_half are output arrays
7990 that parallel "operands". */
7992 void
7993 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7995 while (num--)
7997 rtx op = operands[num];
7999 /* simplify_subreg refuse to split volatile memory addresses, but we
8000 still have to handle it. */
8001 if (GET_CODE (op) == MEM)
8003 lo_half[num] = adjust_address (op, DImode, 0);
8004 hi_half[num] = adjust_address (op, DImode, 8);
8006 else
8008 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8009 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8014 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8015 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8016 is the expression of the binary operation. The output may either be
8017 emitted here, or returned to the caller, like all output_* functions.
8019 There is no guarantee that the operands are the same mode, as they
8020 might be within FLOAT or FLOAT_EXTEND expressions. */
8022 #ifndef SYSV386_COMPAT
8023 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8024 wants to fix the assemblers because that causes incompatibility
8025 with gcc. No-one wants to fix gcc because that causes
8026 incompatibility with assemblers... You can use the option of
8027 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8028 #define SYSV386_COMPAT 1
8029 #endif
8031 const char *
8032 output_387_binary_op (rtx insn, rtx *operands)
8034 static char buf[30];
8035 const char *p;
8036 const char *ssep;
8037 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8039 #ifdef ENABLE_CHECKING
8040 /* Even if we do not want to check the inputs, this documents input
8041 constraints. Which helps in understanding the following code. */
8042 if (STACK_REG_P (operands[0])
8043 && ((REG_P (operands[1])
8044 && REGNO (operands[0]) == REGNO (operands[1])
8045 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8046 || (REG_P (operands[2])
8047 && REGNO (operands[0]) == REGNO (operands[2])
8048 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8049 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8050 ; /* ok */
8051 else
8052 gcc_assert (is_sse);
8053 #endif
8055 switch (GET_CODE (operands[3]))
8057 case PLUS:
8058 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8059 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8060 p = "fiadd";
8061 else
8062 p = "fadd";
8063 ssep = "add";
8064 break;
8066 case MINUS:
8067 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8068 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8069 p = "fisub";
8070 else
8071 p = "fsub";
8072 ssep = "sub";
8073 break;
8075 case MULT:
8076 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8077 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8078 p = "fimul";
8079 else
8080 p = "fmul";
8081 ssep = "mul";
8082 break;
8084 case DIV:
8085 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8086 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8087 p = "fidiv";
8088 else
8089 p = "fdiv";
8090 ssep = "div";
8091 break;
8093 default:
8094 gcc_unreachable ();
8097 if (is_sse)
8099 strcpy (buf, ssep);
8100 if (GET_MODE (operands[0]) == SFmode)
8101 strcat (buf, "ss\t{%2, %0|%0, %2}");
8102 else
8103 strcat (buf, "sd\t{%2, %0|%0, %2}");
8104 return buf;
8106 strcpy (buf, p);
8108 switch (GET_CODE (operands[3]))
8110 case MULT:
8111 case PLUS:
8112 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8114 rtx temp = operands[2];
8115 operands[2] = operands[1];
8116 operands[1] = temp;
8119 /* know operands[0] == operands[1]. */
8121 if (GET_CODE (operands[2]) == MEM)
8123 p = "%z2\t%2";
8124 break;
8127 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8129 if (STACK_TOP_P (operands[0]))
8130 /* How is it that we are storing to a dead operand[2]?
8131 Well, presumably operands[1] is dead too. We can't
8132 store the result to st(0) as st(0) gets popped on this
8133 instruction. Instead store to operands[2] (which I
8134 think has to be st(1)). st(1) will be popped later.
8135 gcc <= 2.8.1 didn't have this check and generated
8136 assembly code that the Unixware assembler rejected. */
8137 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8138 else
8139 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8140 break;
8143 if (STACK_TOP_P (operands[0]))
8144 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8145 else
8146 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8147 break;
8149 case MINUS:
8150 case DIV:
8151 if (GET_CODE (operands[1]) == MEM)
8153 p = "r%z1\t%1";
8154 break;
8157 if (GET_CODE (operands[2]) == MEM)
8159 p = "%z2\t%2";
8160 break;
8163 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8165 #if SYSV386_COMPAT
8166 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8167 derived assemblers, confusingly reverse the direction of
8168 the operation for fsub{r} and fdiv{r} when the
8169 destination register is not st(0). The Intel assembler
8170 doesn't have this brain damage. Read !SYSV386_COMPAT to
8171 figure out what the hardware really does. */
8172 if (STACK_TOP_P (operands[0]))
8173 p = "{p\t%0, %2|rp\t%2, %0}";
8174 else
8175 p = "{rp\t%2, %0|p\t%0, %2}";
8176 #else
8177 if (STACK_TOP_P (operands[0]))
8178 /* As above for fmul/fadd, we can't store to st(0). */
8179 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8180 else
8181 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8182 #endif
8183 break;
8186 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8188 #if SYSV386_COMPAT
8189 if (STACK_TOP_P (operands[0]))
8190 p = "{rp\t%0, %1|p\t%1, %0}";
8191 else
8192 p = "{p\t%1, %0|rp\t%0, %1}";
8193 #else
8194 if (STACK_TOP_P (operands[0]))
8195 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8196 else
8197 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8198 #endif
8199 break;
8202 if (STACK_TOP_P (operands[0]))
8204 if (STACK_TOP_P (operands[1]))
8205 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8206 else
8207 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8208 break;
8210 else if (STACK_TOP_P (operands[1]))
8212 #if SYSV386_COMPAT
8213 p = "{\t%1, %0|r\t%0, %1}";
8214 #else
8215 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8216 #endif
8218 else
8220 #if SYSV386_COMPAT
8221 p = "{r\t%2, %0|\t%0, %2}";
8222 #else
8223 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8224 #endif
8226 break;
8228 default:
8229 gcc_unreachable ();
8232 strcat (buf, p);
8233 return buf;
8236 /* Return needed mode for entity in optimize_mode_switching pass. */
8239 ix86_mode_needed (int entity, rtx insn)
8241 enum attr_i387_cw mode;
8243 /* The mode UNINITIALIZED is used to store control word after a
8244 function call or ASM pattern. The mode ANY specify that function
8245 has no requirements on the control word and make no changes in the
8246 bits we are interested in. */
8248 if (CALL_P (insn)
8249 || (NONJUMP_INSN_P (insn)
8250 && (asm_noperands (PATTERN (insn)) >= 0
8251 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8252 return I387_CW_UNINITIALIZED;
8254 if (recog_memoized (insn) < 0)
8255 return I387_CW_ANY;
8257 mode = get_attr_i387_cw (insn);
8259 switch (entity)
8261 case I387_TRUNC:
8262 if (mode == I387_CW_TRUNC)
8263 return mode;
8264 break;
8266 case I387_FLOOR:
8267 if (mode == I387_CW_FLOOR)
8268 return mode;
8269 break;
8271 case I387_CEIL:
8272 if (mode == I387_CW_CEIL)
8273 return mode;
8274 break;
8276 case I387_MASK_PM:
8277 if (mode == I387_CW_MASK_PM)
8278 return mode;
8279 break;
8281 default:
8282 gcc_unreachable ();
8285 return I387_CW_ANY;
8288 /* Output code to initialize control word copies used by trunc?f?i and
8289 rounding patterns. CURRENT_MODE is set to current control word,
8290 while NEW_MODE is set to new control word. */
8292 void
8293 emit_i387_cw_initialization (int mode)
8295 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8296 rtx new_mode;
8298 int slot;
8300 rtx reg = gen_reg_rtx (HImode);
8302 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8303 emit_move_insn (reg, stored_mode);
8305 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8307 switch (mode)
8309 case I387_CW_TRUNC:
8310 /* round toward zero (truncate) */
8311 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8312 slot = SLOT_CW_TRUNC;
8313 break;
8315 case I387_CW_FLOOR:
8316 /* round down toward -oo */
8317 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8318 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8319 slot = SLOT_CW_FLOOR;
8320 break;
8322 case I387_CW_CEIL:
8323 /* round up toward +oo */
8324 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8325 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8326 slot = SLOT_CW_CEIL;
8327 break;
8329 case I387_CW_MASK_PM:
8330 /* mask precision exception for nearbyint() */
8331 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8332 slot = SLOT_CW_MASK_PM;
8333 break;
8335 default:
8336 gcc_unreachable ();
8339 else
8341 switch (mode)
8343 case I387_CW_TRUNC:
8344 /* round toward zero (truncate) */
8345 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8346 slot = SLOT_CW_TRUNC;
8347 break;
8349 case I387_CW_FLOOR:
8350 /* round down toward -oo */
8351 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8352 slot = SLOT_CW_FLOOR;
8353 break;
8355 case I387_CW_CEIL:
8356 /* round up toward +oo */
8357 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8358 slot = SLOT_CW_CEIL;
8359 break;
8361 case I387_CW_MASK_PM:
8362 /* mask precision exception for nearbyint() */
8363 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8364 slot = SLOT_CW_MASK_PM;
8365 break;
8367 default:
8368 gcc_unreachable ();
8372 gcc_assert (slot < MAX_386_STACK_LOCALS);
8374 new_mode = assign_386_stack_local (HImode, slot);
8375 emit_move_insn (new_mode, reg);
8378 /* Output code for INSN to convert a float to a signed int. OPERANDS
8379 are the insn operands. The output may be [HSD]Imode and the input
8380 operand may be [SDX]Fmode. */
8382 const char *
8383 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8385 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8386 int dimode_p = GET_MODE (operands[0]) == DImode;
8387 int round_mode = get_attr_i387_cw (insn);
8389 /* Jump through a hoop or two for DImode, since the hardware has no
8390 non-popping instruction. We used to do this a different way, but
8391 that was somewhat fragile and broke with post-reload splitters. */
8392 if ((dimode_p || fisttp) && !stack_top_dies)
8393 output_asm_insn ("fld\t%y1", operands);
8395 gcc_assert (STACK_TOP_P (operands[1]));
8396 gcc_assert (GET_CODE (operands[0]) == MEM);
8398 if (fisttp)
8399 output_asm_insn ("fisttp%z0\t%0", operands);
8400 else
8402 if (round_mode != I387_CW_ANY)
8403 output_asm_insn ("fldcw\t%3", operands);
8404 if (stack_top_dies || dimode_p)
8405 output_asm_insn ("fistp%z0\t%0", operands);
8406 else
8407 output_asm_insn ("fist%z0\t%0", operands);
8408 if (round_mode != I387_CW_ANY)
8409 output_asm_insn ("fldcw\t%2", operands);
8412 return "";
8415 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8416 should be used. UNORDERED_P is true when fucom should be used. */
8418 const char *
8419 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8421 int stack_top_dies;
8422 rtx cmp_op0, cmp_op1;
8423 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8425 if (eflags_p)
8427 cmp_op0 = operands[0];
8428 cmp_op1 = operands[1];
8430 else
8432 cmp_op0 = operands[1];
8433 cmp_op1 = operands[2];
8436 if (is_sse)
8438 if (GET_MODE (operands[0]) == SFmode)
8439 if (unordered_p)
8440 return "ucomiss\t{%1, %0|%0, %1}";
8441 else
8442 return "comiss\t{%1, %0|%0, %1}";
8443 else
8444 if (unordered_p)
8445 return "ucomisd\t{%1, %0|%0, %1}";
8446 else
8447 return "comisd\t{%1, %0|%0, %1}";
8450 gcc_assert (STACK_TOP_P (cmp_op0));
8452 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8454 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8456 if (stack_top_dies)
8458 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8459 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
8461 else
8462 return "ftst\n\tfnstsw\t%0";
8465 if (STACK_REG_P (cmp_op1)
8466 && stack_top_dies
8467 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8468 && REGNO (cmp_op1) != FIRST_STACK_REG)
8470 /* If both the top of the 387 stack dies, and the other operand
8471 is also a stack register that dies, then this must be a
8472 `fcompp' float compare */
8474 if (eflags_p)
8476 /* There is no double popping fcomi variant. Fortunately,
8477 eflags is immune from the fstp's cc clobbering. */
8478 if (unordered_p)
8479 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8480 else
8481 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8482 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8484 else
8486 if (unordered_p)
8487 return "fucompp\n\tfnstsw\t%0";
8488 else
8489 return "fcompp\n\tfnstsw\t%0";
8492 else
8494 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8496 static const char * const alt[16] =
8498 "fcom%z2\t%y2\n\tfnstsw\t%0",
8499 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8500 "fucom%z2\t%y2\n\tfnstsw\t%0",
8501 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8503 "ficom%z2\t%y2\n\tfnstsw\t%0",
8504 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8505 NULL,
8506 NULL,
8508 "fcomi\t{%y1, %0|%0, %y1}",
8509 "fcomip\t{%y1, %0|%0, %y1}",
8510 "fucomi\t{%y1, %0|%0, %y1}",
8511 "fucomip\t{%y1, %0|%0, %y1}",
8513 NULL,
8514 NULL,
8515 NULL,
8516 NULL
8519 int mask;
8520 const char *ret;
8522 mask = eflags_p << 3;
8523 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8524 mask |= unordered_p << 1;
8525 mask |= stack_top_dies;
8527 gcc_assert (mask < 16);
8528 ret = alt[mask];
8529 gcc_assert (ret);
8531 return ret;
8535 void
8536 ix86_output_addr_vec_elt (FILE *file, int value)
8538 const char *directive = ASM_LONG;
8540 #ifdef ASM_QUAD
8541 if (TARGET_64BIT)
8542 directive = ASM_QUAD;
8543 #else
8544 gcc_assert (!TARGET_64BIT);
8545 #endif
8547 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8550 void
8551 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8553 if (TARGET_64BIT)
8554 fprintf (file, "%s%s%d-%s%d\n",
8555 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8556 else if (HAVE_AS_GOTOFF_IN_DATA)
8557 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8558 #if TARGET_MACHO
8559 else if (TARGET_MACHO)
8561 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8562 machopic_output_function_base_name (file);
8563 fprintf(file, "\n");
8565 #endif
8566 else
8567 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8568 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8571 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8572 for the target. */
8574 void
8575 ix86_expand_clear (rtx dest)
8577 rtx tmp;
8579 /* We play register width games, which are only valid after reload. */
8580 gcc_assert (reload_completed);
8582 /* Avoid HImode and its attendant prefix byte. */
8583 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8584 dest = gen_rtx_REG (SImode, REGNO (dest));
8586 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8588 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8589 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8591 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8592 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8595 emit_insn (tmp);
8598 /* X is an unchanging MEM. If it is a constant pool reference, return
8599 the constant pool rtx, else NULL. */
8602 maybe_get_pool_constant (rtx x)
8604 x = ix86_delegitimize_address (XEXP (x, 0));
8606 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8607 return get_pool_constant (x);
8609 return NULL_RTX;
8612 void
8613 ix86_expand_move (enum machine_mode mode, rtx operands[])
8615 int strict = (reload_in_progress || reload_completed);
8616 rtx op0, op1;
8617 enum tls_model model;
8619 op0 = operands[0];
8620 op1 = operands[1];
8622 if (GET_CODE (op1) == SYMBOL_REF)
8624 model = SYMBOL_REF_TLS_MODEL (op1);
8625 if (model)
8627 op1 = legitimize_tls_address (op1, model, true);
8628 op1 = force_operand (op1, op0);
8629 if (op1 == op0)
8630 return;
8633 else if (GET_CODE (op1) == CONST
8634 && GET_CODE (XEXP (op1, 0)) == PLUS
8635 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8637 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8638 if (model)
8640 rtx addend = XEXP (XEXP (op1, 0), 1);
8641 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8642 op1 = force_operand (op1, NULL);
8643 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8644 op0, 1, OPTAB_DIRECT);
8645 if (op1 == op0)
8646 return;
8650 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8652 #if TARGET_MACHO
8653 if (MACHOPIC_PURE)
8655 rtx temp = ((reload_in_progress
8656 || ((op0 && GET_CODE (op0) == REG)
8657 && mode == Pmode))
8658 ? op0 : gen_reg_rtx (Pmode));
8659 op1 = machopic_indirect_data_reference (op1, temp);
8660 op1 = machopic_legitimize_pic_address (op1, mode,
8661 temp == op1 ? 0 : temp);
8663 else if (MACHOPIC_INDIRECT)
8664 op1 = machopic_indirect_data_reference (op1, 0);
8665 if (op0 == op1)
8666 return;
8667 #else
8668 if (GET_CODE (op0) == MEM)
8669 op1 = force_reg (Pmode, op1);
8670 else
8671 op1 = legitimize_address (op1, op1, Pmode);
8672 #endif /* TARGET_MACHO */
8674 else
8676 if (GET_CODE (op0) == MEM
8677 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8678 || !push_operand (op0, mode))
8679 && GET_CODE (op1) == MEM)
8680 op1 = force_reg (mode, op1);
8682 if (push_operand (op0, mode)
8683 && ! general_no_elim_operand (op1, mode))
8684 op1 = copy_to_mode_reg (mode, op1);
8686 /* Force large constants in 64bit compilation into register
8687 to get them CSEed. */
8688 if (TARGET_64BIT && mode == DImode
8689 && immediate_operand (op1, mode)
8690 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8691 && !register_operand (op0, mode)
8692 && optimize && !reload_completed && !reload_in_progress)
8693 op1 = copy_to_mode_reg (mode, op1);
8695 if (FLOAT_MODE_P (mode))
8697 /* If we are loading a floating point constant to a register,
8698 force the value to memory now, since we'll get better code
8699 out the back end. */
8701 if (strict)
8703 else if (GET_CODE (op1) == CONST_DOUBLE)
8705 op1 = validize_mem (force_const_mem (mode, op1));
8706 if (!register_operand (op0, mode))
8708 rtx temp = gen_reg_rtx (mode);
8709 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8710 emit_move_insn (op0, temp);
8711 return;
8717 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8720 void
8721 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8723 rtx op0 = operands[0], op1 = operands[1];
8725 /* Force constants other than zero into memory. We do not know how
8726 the instructions used to build constants modify the upper 64 bits
8727 of the register, once we have that information we may be able
8728 to handle some of them more efficiently. */
8729 if ((reload_in_progress | reload_completed) == 0
8730 && register_operand (op0, mode)
8731 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
8732 op1 = validize_mem (force_const_mem (mode, op1));
8734 /* Make operand1 a register if it isn't already. */
8735 if (!no_new_pseudos
8736 && !register_operand (op0, mode)
8737 && !register_operand (op1, mode))
8739 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8740 return;
8743 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8746 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
8747 straight to ix86_expand_vector_move. */
8749 void
8750 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
8752 rtx op0, op1, m;
8754 op0 = operands[0];
8755 op1 = operands[1];
8757 if (MEM_P (op1))
8759 /* If we're optimizing for size, movups is the smallest. */
8760 if (optimize_size)
8762 op0 = gen_lowpart (V4SFmode, op0);
8763 op1 = gen_lowpart (V4SFmode, op1);
8764 emit_insn (gen_sse_movups (op0, op1));
8765 return;
8768 /* ??? If we have typed data, then it would appear that using
8769 movdqu is the only way to get unaligned data loaded with
8770 integer type. */
8771 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8773 op0 = gen_lowpart (V16QImode, op0);
8774 op1 = gen_lowpart (V16QImode, op1);
8775 emit_insn (gen_sse2_movdqu (op0, op1));
8776 return;
8779 if (TARGET_SSE2 && mode == V2DFmode)
8781 rtx zero;
8783 /* When SSE registers are split into halves, we can avoid
8784 writing to the top half twice. */
8785 if (TARGET_SSE_SPLIT_REGS)
8787 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8788 zero = op0;
8790 else
8792 /* ??? Not sure about the best option for the Intel chips.
8793 The following would seem to satisfy; the register is
8794 entirely cleared, breaking the dependency chain. We
8795 then store to the upper half, with a dependency depth
8796 of one. A rumor has it that Intel recommends two movsd
8797 followed by an unpacklpd, but this is unconfirmed. And
8798 given that the dependency depth of the unpacklpd would
8799 still be one, I'm not sure why this would be better. */
8800 zero = CONST0_RTX (V2DFmode);
8803 m = adjust_address (op1, DFmode, 0);
8804 emit_insn (gen_sse2_loadlpd (op0, zero, m));
8805 m = adjust_address (op1, DFmode, 8);
8806 emit_insn (gen_sse2_loadhpd (op0, op0, m));
8808 else
8810 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8811 emit_move_insn (op0, CONST0_RTX (mode));
8812 else
8813 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8815 if (mode != V4SFmode)
8816 op0 = gen_lowpart (V4SFmode, op0);
8817 m = adjust_address (op1, V2SFmode, 0);
8818 emit_insn (gen_sse_loadlps (op0, op0, m));
8819 m = adjust_address (op1, V2SFmode, 8);
8820 emit_insn (gen_sse_loadhps (op0, op0, m));
8823 else if (MEM_P (op0))
8825 /* If we're optimizing for size, movups is the smallest. */
8826 if (optimize_size)
8828 op0 = gen_lowpart (V4SFmode, op0);
8829 op1 = gen_lowpart (V4SFmode, op1);
8830 emit_insn (gen_sse_movups (op0, op1));
8831 return;
8834 /* ??? Similar to above, only less clear because of quote
8835 typeless stores unquote. */
8836 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8837 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8839 op0 = gen_lowpart (V16QImode, op0);
8840 op1 = gen_lowpart (V16QImode, op1);
8841 emit_insn (gen_sse2_movdqu (op0, op1));
8842 return;
8845 if (TARGET_SSE2 && mode == V2DFmode)
8847 m = adjust_address (op0, DFmode, 0);
8848 emit_insn (gen_sse2_storelpd (m, op1));
8849 m = adjust_address (op0, DFmode, 8);
8850 emit_insn (gen_sse2_storehpd (m, op1));
8852 else
8854 if (mode != V4SFmode)
8855 op1 = gen_lowpart (V4SFmode, op1);
8856 m = adjust_address (op0, V2SFmode, 0);
8857 emit_insn (gen_sse_storelps (m, op1));
8858 m = adjust_address (op0, V2SFmode, 8);
8859 emit_insn (gen_sse_storehps (m, op1));
8862 else
8863 gcc_unreachable ();
8866 /* Expand a push in MODE. This is some mode for which we do not support
8867 proper push instructions, at least from the registers that we expect
8868 the value to live in. */
8870 void
8871 ix86_expand_push (enum machine_mode mode, rtx x)
8873 rtx tmp;
8875 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8876 GEN_INT (-GET_MODE_SIZE (mode)),
8877 stack_pointer_rtx, 1, OPTAB_DIRECT);
8878 if (tmp != stack_pointer_rtx)
8879 emit_move_insn (stack_pointer_rtx, tmp);
8881 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8882 emit_move_insn (tmp, x);
8885 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8886 destination to use for the operation. If different from the true
8887 destination in operands[0], a copy operation will be required. */
8890 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8891 rtx operands[])
8893 int matching_memory;
8894 rtx src1, src2, dst;
8896 dst = operands[0];
8897 src1 = operands[1];
8898 src2 = operands[2];
8900 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8901 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8902 && (rtx_equal_p (dst, src2)
8903 || immediate_operand (src1, mode)))
8905 rtx temp = src1;
8906 src1 = src2;
8907 src2 = temp;
8910 /* If the destination is memory, and we do not have matching source
8911 operands, do things in registers. */
8912 matching_memory = 0;
8913 if (GET_CODE (dst) == MEM)
8915 if (rtx_equal_p (dst, src1))
8916 matching_memory = 1;
8917 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8918 && rtx_equal_p (dst, src2))
8919 matching_memory = 2;
8920 else
8921 dst = gen_reg_rtx (mode);
8924 /* Both source operands cannot be in memory. */
8925 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8927 if (matching_memory != 2)
8928 src2 = force_reg (mode, src2);
8929 else
8930 src1 = force_reg (mode, src1);
8933 /* If the operation is not commutable, source 1 cannot be a constant
8934 or non-matching memory. */
8935 if ((CONSTANT_P (src1)
8936 || (!matching_memory && GET_CODE (src1) == MEM))
8937 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8938 src1 = force_reg (mode, src1);
8940 src1 = operands[1] = src1;
8941 src2 = operands[2] = src2;
8942 return dst;
8945 /* Similarly, but assume that the destination has already been
8946 set up properly. */
8948 void
8949 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8950 enum machine_mode mode, rtx operands[])
8952 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
8953 gcc_assert (dst == operands[0]);
8956 /* Attempt to expand a binary operator. Make the expansion closer to the
8957 actual machine, then just general_operand, which will allow 3 separate
8958 memory references (one output, two input) in a single insn. */
8960 void
8961 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8962 rtx operands[])
8964 rtx src1, src2, dst, op, clob;
8966 dst = ix86_fixup_binary_operands (code, mode, operands);
8967 src1 = operands[1];
8968 src2 = operands[2];
8970 /* Emit the instruction. */
8972 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8973 if (reload_in_progress)
8975 /* Reload doesn't know about the flags register, and doesn't know that
8976 it doesn't want to clobber it. We can only do this with PLUS. */
8977 gcc_assert (code == PLUS);
8978 emit_insn (op);
8980 else
8982 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8983 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8986 /* Fix up the destination if needed. */
8987 if (dst != operands[0])
8988 emit_move_insn (operands[0], dst);
8991 /* Return TRUE or FALSE depending on whether the binary operator meets the
8992 appropriate constraints. */
8995 ix86_binary_operator_ok (enum rtx_code code,
8996 enum machine_mode mode ATTRIBUTE_UNUSED,
8997 rtx operands[3])
8999 /* Both source operands cannot be in memory. */
9000 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9001 return 0;
9002 /* If the operation is not commutable, source 1 cannot be a constant. */
9003 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9004 return 0;
9005 /* If the destination is memory, we must have a matching source operand. */
9006 if (GET_CODE (operands[0]) == MEM
9007 && ! (rtx_equal_p (operands[0], operands[1])
9008 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9009 && rtx_equal_p (operands[0], operands[2]))))
9010 return 0;
9011 /* If the operation is not commutable and the source 1 is memory, we must
9012 have a matching destination. */
9013 if (GET_CODE (operands[1]) == MEM
9014 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9015 && ! rtx_equal_p (operands[0], operands[1]))
9016 return 0;
9017 return 1;
9020 /* Attempt to expand a unary operator. Make the expansion closer to the
9021 actual machine, then just general_operand, which will allow 2 separate
9022 memory references (one output, one input) in a single insn. */
9024 void
9025 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9026 rtx operands[])
9028 int matching_memory;
9029 rtx src, dst, op, clob;
9031 dst = operands[0];
9032 src = operands[1];
9034 /* If the destination is memory, and we do not have matching source
9035 operands, do things in registers. */
9036 matching_memory = 0;
9037 if (MEM_P (dst))
9039 if (rtx_equal_p (dst, src))
9040 matching_memory = 1;
9041 else
9042 dst = gen_reg_rtx (mode);
9045 /* When source operand is memory, destination must match. */
9046 if (MEM_P (src) && !matching_memory)
9047 src = force_reg (mode, src);
9049 /* Emit the instruction. */
9051 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9052 if (reload_in_progress || code == NOT)
9054 /* Reload doesn't know about the flags register, and doesn't know that
9055 it doesn't want to clobber it. */
9056 gcc_assert (code == NOT);
9057 emit_insn (op);
9059 else
9061 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9062 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9065 /* Fix up the destination if needed. */
9066 if (dst != operands[0])
9067 emit_move_insn (operands[0], dst);
9070 /* Return TRUE or FALSE depending on whether the unary operator meets the
9071 appropriate constraints. */
9074 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9075 enum machine_mode mode ATTRIBUTE_UNUSED,
9076 rtx operands[2] ATTRIBUTE_UNUSED)
9078 /* If one of operands is memory, source and destination must match. */
9079 if ((GET_CODE (operands[0]) == MEM
9080 || GET_CODE (operands[1]) == MEM)
9081 && ! rtx_equal_p (operands[0], operands[1]))
9082 return FALSE;
9083 return TRUE;
9086 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9087 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9088 true, then replicate the mask for all elements of the vector register.
9089 If INVERT is true, then create a mask excluding the sign bit. */
9092 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9094 enum machine_mode vec_mode;
9095 HOST_WIDE_INT hi, lo;
9096 int shift = 63;
9097 rtvec v;
9098 rtx mask;
9100 /* Find the sign bit, sign extended to 2*HWI. */
9101 if (mode == SFmode)
9102 lo = 0x80000000, hi = lo < 0;
9103 else if (HOST_BITS_PER_WIDE_INT >= 64)
9104 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9105 else
9106 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9108 if (invert)
9109 lo = ~lo, hi = ~hi;
9111 /* Force this value into the low part of a fp vector constant. */
9112 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9113 mask = gen_lowpart (mode, mask);
9115 if (mode == SFmode)
9117 if (vect)
9118 v = gen_rtvec (4, mask, mask, mask, mask);
9119 else
9120 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9121 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9122 vec_mode = V4SFmode;
9124 else
9126 if (vect)
9127 v = gen_rtvec (2, mask, mask);
9128 else
9129 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9130 vec_mode = V2DFmode;
9133 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9136 /* Generate code for floating point ABS or NEG. */
9138 void
9139 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9140 rtx operands[])
9142 rtx mask, set, use, clob, dst, src;
9143 bool matching_memory;
9144 bool use_sse = false;
9145 bool vector_mode = VECTOR_MODE_P (mode);
9146 enum machine_mode elt_mode = mode;
9148 if (vector_mode)
9150 elt_mode = GET_MODE_INNER (mode);
9151 use_sse = true;
9153 else if (TARGET_SSE_MATH)
9154 use_sse = SSE_FLOAT_MODE_P (mode);
9156 /* NEG and ABS performed with SSE use bitwise mask operations.
9157 Create the appropriate mask now. */
9158 if (use_sse)
9159 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9160 else
9162 /* When not using SSE, we don't use the mask, but prefer to keep the
9163 same general form of the insn pattern to reduce duplication when
9164 it comes time to split. */
9165 mask = const0_rtx;
9168 dst = operands[0];
9169 src = operands[1];
9171 /* If the destination is memory, and we don't have matching source
9172 operands, do things in registers. */
9173 matching_memory = false;
9174 if (MEM_P (dst))
9176 if (rtx_equal_p (dst, src))
9177 matching_memory = true;
9178 else
9179 dst = gen_reg_rtx (mode);
9181 if (MEM_P (src) && !matching_memory)
9182 src = force_reg (mode, src);
9184 if (vector_mode)
9186 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9187 set = gen_rtx_SET (VOIDmode, dst, set);
9188 emit_insn (set);
9190 else
9192 set = gen_rtx_fmt_e (code, mode, src);
9193 set = gen_rtx_SET (VOIDmode, dst, set);
9194 use = gen_rtx_USE (VOIDmode, mask);
9195 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9196 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
9199 if (dst != operands[0])
9200 emit_move_insn (operands[0], dst);
9203 /* Expand a copysign operation. Special case operand 0 being a constant. */
9205 void
9206 ix86_expand_copysign (rtx operands[])
9208 enum machine_mode mode, vmode;
9209 rtx dest, op0, op1, mask, nmask;
9211 dest = operands[0];
9212 op0 = operands[1];
9213 op1 = operands[2];
9215 mode = GET_MODE (dest);
9216 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9218 if (GET_CODE (op0) == CONST_DOUBLE)
9220 rtvec v;
9222 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9223 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9225 if (op0 == CONST0_RTX (mode))
9226 op0 = CONST0_RTX (vmode);
9227 else
9229 if (mode == SFmode)
9230 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9231 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9232 else
9233 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9234 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9237 mask = ix86_build_signbit_mask (mode, 0, 0);
9239 if (mode == SFmode)
9240 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9241 else
9242 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9244 else
9246 nmask = ix86_build_signbit_mask (mode, 0, 1);
9247 mask = ix86_build_signbit_mask (mode, 0, 0);
9249 if (mode == SFmode)
9250 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9251 else
9252 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9256 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9257 be a constant, and so has already been expanded into a vector constant. */
9259 void
9260 ix86_split_copysign_const (rtx operands[])
9262 enum machine_mode mode, vmode;
9263 rtx dest, op0, op1, mask, x;
9265 dest = operands[0];
9266 op0 = operands[1];
9267 op1 = operands[2];
9268 mask = operands[3];
9270 mode = GET_MODE (dest);
9271 vmode = GET_MODE (mask);
9273 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9274 x = gen_rtx_AND (vmode, dest, mask);
9275 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9277 if (op0 != CONST0_RTX (vmode))
9279 x = gen_rtx_IOR (vmode, dest, op0);
9280 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9284 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9285 so we have to do two masks. */
9287 void
9288 ix86_split_copysign_var (rtx operands[])
9290 enum machine_mode mode, vmode;
9291 rtx dest, scratch, op0, op1, mask, nmask, x;
9293 dest = operands[0];
9294 scratch = operands[1];
9295 op0 = operands[2];
9296 op1 = operands[3];
9297 nmask = operands[4];
9298 mask = operands[5];
9300 mode = GET_MODE (dest);
9301 vmode = GET_MODE (mask);
9303 if (rtx_equal_p (op0, op1))
9305 /* Shouldn't happen often (it's useless, obviously), but when it does
9306 we'd generate incorrect code if we continue below. */
9307 emit_move_insn (dest, op0);
9308 return;
9311 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9313 gcc_assert (REGNO (op1) == REGNO (scratch));
9315 x = gen_rtx_AND (vmode, scratch, mask);
9316 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9318 dest = mask;
9319 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9320 x = gen_rtx_NOT (vmode, dest);
9321 x = gen_rtx_AND (vmode, x, op0);
9322 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9324 else
9326 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9328 x = gen_rtx_AND (vmode, scratch, mask);
9330 else /* alternative 2,4 */
9332 gcc_assert (REGNO (mask) == REGNO (scratch));
9333 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9334 x = gen_rtx_AND (vmode, scratch, op1);
9336 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9338 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9340 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9341 x = gen_rtx_AND (vmode, dest, nmask);
9343 else /* alternative 3,4 */
9345 gcc_assert (REGNO (nmask) == REGNO (dest));
9346 dest = nmask;
9347 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9348 x = gen_rtx_AND (vmode, dest, op0);
9350 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9353 x = gen_rtx_IOR (vmode, dest, scratch);
9354 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9357 /* Return TRUE or FALSE depending on whether the first SET in INSN
9358 has source and destination with matching CC modes, and that the
9359 CC mode is at least as constrained as REQ_MODE. */
9362 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9364 rtx set;
9365 enum machine_mode set_mode;
9367 set = PATTERN (insn);
9368 if (GET_CODE (set) == PARALLEL)
9369 set = XVECEXP (set, 0, 0);
9370 gcc_assert (GET_CODE (set) == SET);
9371 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9373 set_mode = GET_MODE (SET_DEST (set));
9374 switch (set_mode)
9376 case CCNOmode:
9377 if (req_mode != CCNOmode
9378 && (req_mode != CCmode
9379 || XEXP (SET_SRC (set), 1) != const0_rtx))
9380 return 0;
9381 break;
9382 case CCmode:
9383 if (req_mode == CCGCmode)
9384 return 0;
9385 /* FALLTHRU */
9386 case CCGCmode:
9387 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9388 return 0;
9389 /* FALLTHRU */
9390 case CCGOCmode:
9391 if (req_mode == CCZmode)
9392 return 0;
9393 /* FALLTHRU */
9394 case CCZmode:
9395 break;
9397 default:
9398 gcc_unreachable ();
9401 return (GET_MODE (SET_SRC (set)) == set_mode);
9404 /* Generate insn patterns to do an integer compare of OPERANDS. */
9406 static rtx
9407 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9409 enum machine_mode cmpmode;
9410 rtx tmp, flags;
9412 cmpmode = SELECT_CC_MODE (code, op0, op1);
9413 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9415 /* This is very simple, but making the interface the same as in the
9416 FP case makes the rest of the code easier. */
9417 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9418 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9420 /* Return the test that should be put into the flags user, i.e.
9421 the bcc, scc, or cmov instruction. */
9422 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9425 /* Figure out whether to use ordered or unordered fp comparisons.
9426 Return the appropriate mode to use. */
9428 enum machine_mode
9429 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9431 /* ??? In order to make all comparisons reversible, we do all comparisons
9432 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9433 all forms trapping and nontrapping comparisons, we can make inequality
9434 comparisons trapping again, since it results in better code when using
9435 FCOM based compares. */
9436 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9439 enum machine_mode
9440 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9442 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9443 return ix86_fp_compare_mode (code);
9444 switch (code)
9446 /* Only zero flag is needed. */
9447 case EQ: /* ZF=0 */
9448 case NE: /* ZF!=0 */
9449 return CCZmode;
9450 /* Codes needing carry flag. */
9451 case GEU: /* CF=0 */
9452 case GTU: /* CF=0 & ZF=0 */
9453 case LTU: /* CF=1 */
9454 case LEU: /* CF=1 | ZF=1 */
9455 return CCmode;
9456 /* Codes possibly doable only with sign flag when
9457 comparing against zero. */
9458 case GE: /* SF=OF or SF=0 */
9459 case LT: /* SF<>OF or SF=1 */
9460 if (op1 == const0_rtx)
9461 return CCGOCmode;
9462 else
9463 /* For other cases Carry flag is not required. */
9464 return CCGCmode;
9465 /* Codes doable only with sign flag when comparing
9466 against zero, but we miss jump instruction for it
9467 so we need to use relational tests against overflow
9468 that thus needs to be zero. */
9469 case GT: /* ZF=0 & SF=OF */
9470 case LE: /* ZF=1 | SF<>OF */
9471 if (op1 == const0_rtx)
9472 return CCNOmode;
9473 else
9474 return CCGCmode;
9475 /* strcmp pattern do (use flags) and combine may ask us for proper
9476 mode. */
9477 case USE:
9478 return CCmode;
9479 default:
9480 gcc_unreachable ();
9484 /* Return the fixed registers used for condition codes. */
9486 static bool
9487 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9489 *p1 = FLAGS_REG;
9490 *p2 = FPSR_REG;
9491 return true;
9494 /* If two condition code modes are compatible, return a condition code
9495 mode which is compatible with both. Otherwise, return
9496 VOIDmode. */
9498 static enum machine_mode
9499 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9501 if (m1 == m2)
9502 return m1;
9504 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9505 return VOIDmode;
9507 if ((m1 == CCGCmode && m2 == CCGOCmode)
9508 || (m1 == CCGOCmode && m2 == CCGCmode))
9509 return CCGCmode;
9511 switch (m1)
9513 default:
9514 gcc_unreachable ();
9516 case CCmode:
9517 case CCGCmode:
9518 case CCGOCmode:
9519 case CCNOmode:
9520 case CCZmode:
9521 switch (m2)
9523 default:
9524 return VOIDmode;
9526 case CCmode:
9527 case CCGCmode:
9528 case CCGOCmode:
9529 case CCNOmode:
9530 case CCZmode:
9531 return CCmode;
9534 case CCFPmode:
9535 case CCFPUmode:
9536 /* These are only compatible with themselves, which we already
9537 checked above. */
9538 return VOIDmode;
9542 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9545 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9547 enum rtx_code swapped_code = swap_condition (code);
9548 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9549 || (ix86_fp_comparison_cost (swapped_code)
9550 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9553 /* Swap, force into registers, or otherwise massage the two operands
9554 to a fp comparison. The operands are updated in place; the new
9555 comparison code is returned. */
9557 static enum rtx_code
9558 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9560 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9561 rtx op0 = *pop0, op1 = *pop1;
9562 enum machine_mode op_mode = GET_MODE (op0);
9563 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9565 /* All of the unordered compare instructions only work on registers.
9566 The same is true of the fcomi compare instructions. The XFmode
9567 compare instructions require registers except when comparing
9568 against zero or when converting operand 1 from fixed point to
9569 floating point. */
9571 if (!is_sse
9572 && (fpcmp_mode == CCFPUmode
9573 || (op_mode == XFmode
9574 && ! (standard_80387_constant_p (op0) == 1
9575 || standard_80387_constant_p (op1) == 1)
9576 && GET_CODE (op1) != FLOAT)
9577 || ix86_use_fcomi_compare (code)))
9579 op0 = force_reg (op_mode, op0);
9580 op1 = force_reg (op_mode, op1);
9582 else
9584 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9585 things around if they appear profitable, otherwise force op0
9586 into a register. */
9588 if (standard_80387_constant_p (op0) == 0
9589 || (GET_CODE (op0) == MEM
9590 && ! (standard_80387_constant_p (op1) == 0
9591 || GET_CODE (op1) == MEM)))
9593 rtx tmp;
9594 tmp = op0, op0 = op1, op1 = tmp;
9595 code = swap_condition (code);
9598 if (GET_CODE (op0) != REG)
9599 op0 = force_reg (op_mode, op0);
9601 if (CONSTANT_P (op1))
9603 int tmp = standard_80387_constant_p (op1);
9604 if (tmp == 0)
9605 op1 = validize_mem (force_const_mem (op_mode, op1));
9606 else if (tmp == 1)
9608 if (TARGET_CMOVE)
9609 op1 = force_reg (op_mode, op1);
9611 else
9612 op1 = force_reg (op_mode, op1);
9616 /* Try to rearrange the comparison to make it cheaper. */
9617 if (ix86_fp_comparison_cost (code)
9618 > ix86_fp_comparison_cost (swap_condition (code))
9619 && (GET_CODE (op1) == REG || !no_new_pseudos))
9621 rtx tmp;
9622 tmp = op0, op0 = op1, op1 = tmp;
9623 code = swap_condition (code);
9624 if (GET_CODE (op0) != REG)
9625 op0 = force_reg (op_mode, op0);
9628 *pop0 = op0;
9629 *pop1 = op1;
9630 return code;
9633 /* Convert comparison codes we use to represent FP comparison to integer
9634 code that will result in proper branch. Return UNKNOWN if no such code
9635 is available. */
9637 enum rtx_code
9638 ix86_fp_compare_code_to_integer (enum rtx_code code)
9640 switch (code)
9642 case GT:
9643 return GTU;
9644 case GE:
9645 return GEU;
9646 case ORDERED:
9647 case UNORDERED:
9648 return code;
9649 break;
9650 case UNEQ:
9651 return EQ;
9652 break;
9653 case UNLT:
9654 return LTU;
9655 break;
9656 case UNLE:
9657 return LEU;
9658 break;
9659 case LTGT:
9660 return NE;
9661 break;
9662 default:
9663 return UNKNOWN;
9667 /* Split comparison code CODE into comparisons we can do using branch
9668 instructions. BYPASS_CODE is comparison code for branch that will
9669 branch around FIRST_CODE and SECOND_CODE. If some of branches
9670 is not required, set value to UNKNOWN.
9671 We never require more than two branches. */
9673 void
9674 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9675 enum rtx_code *first_code,
9676 enum rtx_code *second_code)
9678 *first_code = code;
9679 *bypass_code = UNKNOWN;
9680 *second_code = UNKNOWN;
9682 /* The fcomi comparison sets flags as follows:
9684 cmp ZF PF CF
9685 > 0 0 0
9686 < 0 0 1
9687 = 1 0 0
9688 un 1 1 1 */
9690 switch (code)
9692 case GT: /* GTU - CF=0 & ZF=0 */
9693 case GE: /* GEU - CF=0 */
9694 case ORDERED: /* PF=0 */
9695 case UNORDERED: /* PF=1 */
9696 case UNEQ: /* EQ - ZF=1 */
9697 case UNLT: /* LTU - CF=1 */
9698 case UNLE: /* LEU - CF=1 | ZF=1 */
9699 case LTGT: /* EQ - ZF=0 */
9700 break;
9701 case LT: /* LTU - CF=1 - fails on unordered */
9702 *first_code = UNLT;
9703 *bypass_code = UNORDERED;
9704 break;
9705 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9706 *first_code = UNLE;
9707 *bypass_code = UNORDERED;
9708 break;
9709 case EQ: /* EQ - ZF=1 - fails on unordered */
9710 *first_code = UNEQ;
9711 *bypass_code = UNORDERED;
9712 break;
9713 case NE: /* NE - ZF=0 - fails on unordered */
9714 *first_code = LTGT;
9715 *second_code = UNORDERED;
9716 break;
9717 case UNGE: /* GEU - CF=0 - fails on unordered */
9718 *first_code = GE;
9719 *second_code = UNORDERED;
9720 break;
9721 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9722 *first_code = GT;
9723 *second_code = UNORDERED;
9724 break;
9725 default:
9726 gcc_unreachable ();
9728 if (!TARGET_IEEE_FP)
9730 *second_code = UNKNOWN;
9731 *bypass_code = UNKNOWN;
9735 /* Return cost of comparison done fcom + arithmetics operations on AX.
9736 All following functions do use number of instructions as a cost metrics.
9737 In future this should be tweaked to compute bytes for optimize_size and
9738 take into account performance of various instructions on various CPUs. */
9739 static int
9740 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9742 if (!TARGET_IEEE_FP)
9743 return 4;
9744 /* The cost of code output by ix86_expand_fp_compare. */
9745 switch (code)
9747 case UNLE:
9748 case UNLT:
9749 case LTGT:
9750 case GT:
9751 case GE:
9752 case UNORDERED:
9753 case ORDERED:
9754 case UNEQ:
9755 return 4;
9756 break;
9757 case LT:
9758 case NE:
9759 case EQ:
9760 case UNGE:
9761 return 5;
9762 break;
9763 case LE:
9764 case UNGT:
9765 return 6;
9766 break;
9767 default:
9768 gcc_unreachable ();
9772 /* Return cost of comparison done using fcomi operation.
9773 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9774 static int
9775 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9777 enum rtx_code bypass_code, first_code, second_code;
9778 /* Return arbitrarily high cost when instruction is not supported - this
9779 prevents gcc from using it. */
9780 if (!TARGET_CMOVE)
9781 return 1024;
9782 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9783 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9786 /* Return cost of comparison done using sahf operation.
9787 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9788 static int
9789 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9791 enum rtx_code bypass_code, first_code, second_code;
9792 /* Return arbitrarily high cost when instruction is not preferred - this
9793 avoids gcc from using it. */
9794 if (!TARGET_USE_SAHF && !optimize_size)
9795 return 1024;
9796 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9797 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9800 /* Compute cost of the comparison done using any method.
9801 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9802 static int
9803 ix86_fp_comparison_cost (enum rtx_code code)
9805 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9806 int min;
9808 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9809 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9811 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9812 if (min > sahf_cost)
9813 min = sahf_cost;
9814 if (min > fcomi_cost)
9815 min = fcomi_cost;
9816 return min;
9819 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9821 static rtx
9822 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9823 rtx *second_test, rtx *bypass_test)
9825 enum machine_mode fpcmp_mode, intcmp_mode;
9826 rtx tmp, tmp2;
9827 int cost = ix86_fp_comparison_cost (code);
9828 enum rtx_code bypass_code, first_code, second_code;
9830 fpcmp_mode = ix86_fp_compare_mode (code);
9831 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9833 if (second_test)
9834 *second_test = NULL_RTX;
9835 if (bypass_test)
9836 *bypass_test = NULL_RTX;
9838 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9840 /* Do fcomi/sahf based test when profitable. */
9841 if ((bypass_code == UNKNOWN || bypass_test)
9842 && (second_code == UNKNOWN || second_test)
9843 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9845 if (TARGET_CMOVE)
9847 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9848 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9849 tmp);
9850 emit_insn (tmp);
9852 else
9854 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9855 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9856 if (!scratch)
9857 scratch = gen_reg_rtx (HImode);
9858 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9859 emit_insn (gen_x86_sahf_1 (scratch));
9862 /* The FP codes work out to act like unsigned. */
9863 intcmp_mode = fpcmp_mode;
9864 code = first_code;
9865 if (bypass_code != UNKNOWN)
9866 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9867 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9868 const0_rtx);
9869 if (second_code != UNKNOWN)
9870 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9871 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9872 const0_rtx);
9874 else
9876 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9877 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9878 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9879 if (!scratch)
9880 scratch = gen_reg_rtx (HImode);
9881 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9883 /* In the unordered case, we have to check C2 for NaN's, which
9884 doesn't happen to work out to anything nice combination-wise.
9885 So do some bit twiddling on the value we've got in AH to come
9886 up with an appropriate set of condition codes. */
9888 intcmp_mode = CCNOmode;
9889 switch (code)
9891 case GT:
9892 case UNGT:
9893 if (code == GT || !TARGET_IEEE_FP)
9895 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9896 code = EQ;
9898 else
9900 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9901 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9902 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9903 intcmp_mode = CCmode;
9904 code = GEU;
9906 break;
9907 case LT:
9908 case UNLT:
9909 if (code == LT && TARGET_IEEE_FP)
9911 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9912 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9913 intcmp_mode = CCmode;
9914 code = EQ;
9916 else
9918 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9919 code = NE;
9921 break;
9922 case GE:
9923 case UNGE:
9924 if (code == GE || !TARGET_IEEE_FP)
9926 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9927 code = EQ;
9929 else
9931 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9932 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9933 GEN_INT (0x01)));
9934 code = NE;
9936 break;
9937 case LE:
9938 case UNLE:
9939 if (code == LE && TARGET_IEEE_FP)
9941 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9942 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9943 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9944 intcmp_mode = CCmode;
9945 code = LTU;
9947 else
9949 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9950 code = NE;
9952 break;
9953 case EQ:
9954 case UNEQ:
9955 if (code == EQ && TARGET_IEEE_FP)
9957 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9958 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9959 intcmp_mode = CCmode;
9960 code = EQ;
9962 else
9964 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9965 code = NE;
9966 break;
9968 break;
9969 case NE:
9970 case LTGT:
9971 if (code == NE && TARGET_IEEE_FP)
9973 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9974 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9975 GEN_INT (0x40)));
9976 code = NE;
9978 else
9980 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9981 code = EQ;
9983 break;
9985 case UNORDERED:
9986 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9987 code = NE;
9988 break;
9989 case ORDERED:
9990 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9991 code = EQ;
9992 break;
9994 default:
9995 gcc_unreachable ();
9999 /* Return the test that should be put into the flags user, i.e.
10000 the bcc, scc, or cmov instruction. */
10001 return gen_rtx_fmt_ee (code, VOIDmode,
10002 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10003 const0_rtx);
10007 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10009 rtx op0, op1, ret;
10010 op0 = ix86_compare_op0;
10011 op1 = ix86_compare_op1;
10013 if (second_test)
10014 *second_test = NULL_RTX;
10015 if (bypass_test)
10016 *bypass_test = NULL_RTX;
10018 if (ix86_compare_emitted)
10020 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10021 ix86_compare_emitted = NULL_RTX;
10023 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10024 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10025 second_test, bypass_test);
10026 else
10027 ret = ix86_expand_int_compare (code, op0, op1);
10029 return ret;
10032 /* Return true if the CODE will result in nontrivial jump sequence. */
10033 bool
10034 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10036 enum rtx_code bypass_code, first_code, second_code;
10037 if (!TARGET_CMOVE)
10038 return true;
10039 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10040 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10043 void
10044 ix86_expand_branch (enum rtx_code code, rtx label)
10046 rtx tmp;
10048 switch (GET_MODE (ix86_compare_op0))
10050 case QImode:
10051 case HImode:
10052 case SImode:
10053 simple:
10054 tmp = ix86_expand_compare (code, NULL, NULL);
10055 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10056 gen_rtx_LABEL_REF (VOIDmode, label),
10057 pc_rtx);
10058 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10059 return;
10061 case SFmode:
10062 case DFmode:
10063 case XFmode:
10065 rtvec vec;
10066 int use_fcomi;
10067 enum rtx_code bypass_code, first_code, second_code;
10069 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10070 &ix86_compare_op1);
10072 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10074 /* Check whether we will use the natural sequence with one jump. If
10075 so, we can expand jump early. Otherwise delay expansion by
10076 creating compound insn to not confuse optimizers. */
10077 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10078 && TARGET_CMOVE)
10080 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10081 gen_rtx_LABEL_REF (VOIDmode, label),
10082 pc_rtx, NULL_RTX, NULL_RTX);
10084 else
10086 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10087 ix86_compare_op0, ix86_compare_op1);
10088 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10089 gen_rtx_LABEL_REF (VOIDmode, label),
10090 pc_rtx);
10091 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10093 use_fcomi = ix86_use_fcomi_compare (code);
10094 vec = rtvec_alloc (3 + !use_fcomi);
10095 RTVEC_ELT (vec, 0) = tmp;
10096 RTVEC_ELT (vec, 1)
10097 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10098 RTVEC_ELT (vec, 2)
10099 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10100 if (! use_fcomi)
10101 RTVEC_ELT (vec, 3)
10102 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10104 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10106 return;
10109 case DImode:
10110 if (TARGET_64BIT)
10111 goto simple;
10112 case TImode:
10113 /* Expand DImode branch into multiple compare+branch. */
10115 rtx lo[2], hi[2], label2;
10116 enum rtx_code code1, code2, code3;
10117 enum machine_mode submode;
10119 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10121 tmp = ix86_compare_op0;
10122 ix86_compare_op0 = ix86_compare_op1;
10123 ix86_compare_op1 = tmp;
10124 code = swap_condition (code);
10126 if (GET_MODE (ix86_compare_op0) == DImode)
10128 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10129 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10130 submode = SImode;
10132 else
10134 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10135 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10136 submode = DImode;
10139 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10140 avoid two branches. This costs one extra insn, so disable when
10141 optimizing for size. */
10143 if ((code == EQ || code == NE)
10144 && (!optimize_size
10145 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10147 rtx xor0, xor1;
10149 xor1 = hi[0];
10150 if (hi[1] != const0_rtx)
10151 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10152 NULL_RTX, 0, OPTAB_WIDEN);
10154 xor0 = lo[0];
10155 if (lo[1] != const0_rtx)
10156 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10157 NULL_RTX, 0, OPTAB_WIDEN);
10159 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10160 NULL_RTX, 0, OPTAB_WIDEN);
10162 ix86_compare_op0 = tmp;
10163 ix86_compare_op1 = const0_rtx;
10164 ix86_expand_branch (code, label);
10165 return;
10168 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10169 op1 is a constant and the low word is zero, then we can just
10170 examine the high word. */
10172 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10173 switch (code)
10175 case LT: case LTU: case GE: case GEU:
10176 ix86_compare_op0 = hi[0];
10177 ix86_compare_op1 = hi[1];
10178 ix86_expand_branch (code, label);
10179 return;
10180 default:
10181 break;
10184 /* Otherwise, we need two or three jumps. */
10186 label2 = gen_label_rtx ();
10188 code1 = code;
10189 code2 = swap_condition (code);
10190 code3 = unsigned_condition (code);
10192 switch (code)
10194 case LT: case GT: case LTU: case GTU:
10195 break;
10197 case LE: code1 = LT; code2 = GT; break;
10198 case GE: code1 = GT; code2 = LT; break;
10199 case LEU: code1 = LTU; code2 = GTU; break;
10200 case GEU: code1 = GTU; code2 = LTU; break;
10202 case EQ: code1 = UNKNOWN; code2 = NE; break;
10203 case NE: code2 = UNKNOWN; break;
10205 default:
10206 gcc_unreachable ();
10210 * a < b =>
10211 * if (hi(a) < hi(b)) goto true;
10212 * if (hi(a) > hi(b)) goto false;
10213 * if (lo(a) < lo(b)) goto true;
10214 * false:
10217 ix86_compare_op0 = hi[0];
10218 ix86_compare_op1 = hi[1];
10220 if (code1 != UNKNOWN)
10221 ix86_expand_branch (code1, label);
10222 if (code2 != UNKNOWN)
10223 ix86_expand_branch (code2, label2);
10225 ix86_compare_op0 = lo[0];
10226 ix86_compare_op1 = lo[1];
10227 ix86_expand_branch (code3, label);
10229 if (code2 != UNKNOWN)
10230 emit_label (label2);
10231 return;
10234 default:
10235 gcc_unreachable ();
10239 /* Split branch based on floating point condition. */
10240 void
10241 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10242 rtx target1, rtx target2, rtx tmp, rtx pushed)
10244 rtx second, bypass;
10245 rtx label = NULL_RTX;
10246 rtx condition;
10247 int bypass_probability = -1, second_probability = -1, probability = -1;
10248 rtx i;
10250 if (target2 != pc_rtx)
10252 rtx tmp = target2;
10253 code = reverse_condition_maybe_unordered (code);
10254 target2 = target1;
10255 target1 = tmp;
10258 condition = ix86_expand_fp_compare (code, op1, op2,
10259 tmp, &second, &bypass);
10261 /* Remove pushed operand from stack. */
10262 if (pushed)
10263 ix86_free_from_memory (GET_MODE (pushed));
10265 if (split_branch_probability >= 0)
10267 /* Distribute the probabilities across the jumps.
10268 Assume the BYPASS and SECOND to be always test
10269 for UNORDERED. */
10270 probability = split_branch_probability;
10272 /* Value of 1 is low enough to make no need for probability
10273 to be updated. Later we may run some experiments and see
10274 if unordered values are more frequent in practice. */
10275 if (bypass)
10276 bypass_probability = 1;
10277 if (second)
10278 second_probability = 1;
10280 if (bypass != NULL_RTX)
10282 label = gen_label_rtx ();
10283 i = emit_jump_insn (gen_rtx_SET
10284 (VOIDmode, pc_rtx,
10285 gen_rtx_IF_THEN_ELSE (VOIDmode,
10286 bypass,
10287 gen_rtx_LABEL_REF (VOIDmode,
10288 label),
10289 pc_rtx)));
10290 if (bypass_probability >= 0)
10291 REG_NOTES (i)
10292 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10293 GEN_INT (bypass_probability),
10294 REG_NOTES (i));
10296 i = emit_jump_insn (gen_rtx_SET
10297 (VOIDmode, pc_rtx,
10298 gen_rtx_IF_THEN_ELSE (VOIDmode,
10299 condition, target1, target2)));
10300 if (probability >= 0)
10301 REG_NOTES (i)
10302 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10303 GEN_INT (probability),
10304 REG_NOTES (i));
10305 if (second != NULL_RTX)
10307 i = emit_jump_insn (gen_rtx_SET
10308 (VOIDmode, pc_rtx,
10309 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10310 target2)));
10311 if (second_probability >= 0)
10312 REG_NOTES (i)
10313 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10314 GEN_INT (second_probability),
10315 REG_NOTES (i));
10317 if (label != NULL_RTX)
10318 emit_label (label);
10322 ix86_expand_setcc (enum rtx_code code, rtx dest)
10324 rtx ret, tmp, tmpreg, equiv;
10325 rtx second_test, bypass_test;
10327 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10328 return 0; /* FAIL */
10330 gcc_assert (GET_MODE (dest) == QImode);
10332 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10333 PUT_MODE (ret, QImode);
10335 tmp = dest;
10336 tmpreg = dest;
10338 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10339 if (bypass_test || second_test)
10341 rtx test = second_test;
10342 int bypass = 0;
10343 rtx tmp2 = gen_reg_rtx (QImode);
10344 if (bypass_test)
10346 gcc_assert (!second_test);
10347 test = bypass_test;
10348 bypass = 1;
10349 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10351 PUT_MODE (test, QImode);
10352 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10354 if (bypass)
10355 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10356 else
10357 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10360 /* Attach a REG_EQUAL note describing the comparison result. */
10361 if (ix86_compare_op0 && ix86_compare_op1)
10363 equiv = simplify_gen_relational (code, QImode,
10364 GET_MODE (ix86_compare_op0),
10365 ix86_compare_op0, ix86_compare_op1);
10366 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10369 return 1; /* DONE */
10372 /* Expand comparison setting or clearing carry flag. Return true when
10373 successful and set pop for the operation. */
10374 static bool
10375 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10377 enum machine_mode mode =
10378 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10380 /* Do not handle DImode compares that go trought special path. Also we can't
10381 deal with FP compares yet. This is possible to add. */
10382 if (mode == (TARGET_64BIT ? TImode : DImode))
10383 return false;
10384 if (FLOAT_MODE_P (mode))
10386 rtx second_test = NULL, bypass_test = NULL;
10387 rtx compare_op, compare_seq;
10389 /* Shortcut: following common codes never translate into carry flag compares. */
10390 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10391 || code == ORDERED || code == UNORDERED)
10392 return false;
10394 /* These comparisons require zero flag; swap operands so they won't. */
10395 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10396 && !TARGET_IEEE_FP)
10398 rtx tmp = op0;
10399 op0 = op1;
10400 op1 = tmp;
10401 code = swap_condition (code);
10404 /* Try to expand the comparison and verify that we end up with carry flag
10405 based comparison. This is fails to be true only when we decide to expand
10406 comparison using arithmetic that is not too common scenario. */
10407 start_sequence ();
10408 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10409 &second_test, &bypass_test);
10410 compare_seq = get_insns ();
10411 end_sequence ();
10413 if (second_test || bypass_test)
10414 return false;
10415 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10416 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10417 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10418 else
10419 code = GET_CODE (compare_op);
10420 if (code != LTU && code != GEU)
10421 return false;
10422 emit_insn (compare_seq);
10423 *pop = compare_op;
10424 return true;
10426 if (!INTEGRAL_MODE_P (mode))
10427 return false;
10428 switch (code)
10430 case LTU:
10431 case GEU:
10432 break;
10434 /* Convert a==0 into (unsigned)a<1. */
10435 case EQ:
10436 case NE:
10437 if (op1 != const0_rtx)
10438 return false;
10439 op1 = const1_rtx;
10440 code = (code == EQ ? LTU : GEU);
10441 break;
10443 /* Convert a>b into b<a or a>=b-1. */
10444 case GTU:
10445 case LEU:
10446 if (GET_CODE (op1) == CONST_INT)
10448 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10449 /* Bail out on overflow. We still can swap operands but that
10450 would force loading of the constant into register. */
10451 if (op1 == const0_rtx
10452 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10453 return false;
10454 code = (code == GTU ? GEU : LTU);
10456 else
10458 rtx tmp = op1;
10459 op1 = op0;
10460 op0 = tmp;
10461 code = (code == GTU ? LTU : GEU);
10463 break;
10465 /* Convert a>=0 into (unsigned)a<0x80000000. */
10466 case LT:
10467 case GE:
10468 if (mode == DImode || op1 != const0_rtx)
10469 return false;
10470 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10471 code = (code == LT ? GEU : LTU);
10472 break;
10473 case LE:
10474 case GT:
10475 if (mode == DImode || op1 != constm1_rtx)
10476 return false;
10477 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10478 code = (code == LE ? GEU : LTU);
10479 break;
10481 default:
10482 return false;
10484 /* Swapping operands may cause constant to appear as first operand. */
10485 if (!nonimmediate_operand (op0, VOIDmode))
10487 if (no_new_pseudos)
10488 return false;
10489 op0 = force_reg (mode, op0);
10491 ix86_compare_op0 = op0;
10492 ix86_compare_op1 = op1;
10493 *pop = ix86_expand_compare (code, NULL, NULL);
10494 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10495 return true;
10499 ix86_expand_int_movcc (rtx operands[])
10501 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10502 rtx compare_seq, compare_op;
10503 rtx second_test, bypass_test;
10504 enum machine_mode mode = GET_MODE (operands[0]);
10505 bool sign_bit_compare_p = false;;
10507 start_sequence ();
10508 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10509 compare_seq = get_insns ();
10510 end_sequence ();
10512 compare_code = GET_CODE (compare_op);
10514 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10515 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10516 sign_bit_compare_p = true;
10518 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10519 HImode insns, we'd be swallowed in word prefix ops. */
10521 if ((mode != HImode || TARGET_FAST_PREFIX)
10522 && (mode != (TARGET_64BIT ? TImode : DImode))
10523 && GET_CODE (operands[2]) == CONST_INT
10524 && GET_CODE (operands[3]) == CONST_INT)
10526 rtx out = operands[0];
10527 HOST_WIDE_INT ct = INTVAL (operands[2]);
10528 HOST_WIDE_INT cf = INTVAL (operands[3]);
10529 HOST_WIDE_INT diff;
10531 diff = ct - cf;
10532 /* Sign bit compares are better done using shifts than we do by using
10533 sbb. */
10534 if (sign_bit_compare_p
10535 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10536 ix86_compare_op1, &compare_op))
10538 /* Detect overlap between destination and compare sources. */
10539 rtx tmp = out;
10541 if (!sign_bit_compare_p)
10543 bool fpcmp = false;
10545 compare_code = GET_CODE (compare_op);
10547 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10548 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10550 fpcmp = true;
10551 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10554 /* To simplify rest of code, restrict to the GEU case. */
10555 if (compare_code == LTU)
10557 HOST_WIDE_INT tmp = ct;
10558 ct = cf;
10559 cf = tmp;
10560 compare_code = reverse_condition (compare_code);
10561 code = reverse_condition (code);
10563 else
10565 if (fpcmp)
10566 PUT_CODE (compare_op,
10567 reverse_condition_maybe_unordered
10568 (GET_CODE (compare_op)));
10569 else
10570 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10572 diff = ct - cf;
10574 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10575 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10576 tmp = gen_reg_rtx (mode);
10578 if (mode == DImode)
10579 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10580 else
10581 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10583 else
10585 if (code == GT || code == GE)
10586 code = reverse_condition (code);
10587 else
10589 HOST_WIDE_INT tmp = ct;
10590 ct = cf;
10591 cf = tmp;
10592 diff = ct - cf;
10594 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10595 ix86_compare_op1, VOIDmode, 0, -1);
10598 if (diff == 1)
10601 * cmpl op0,op1
10602 * sbbl dest,dest
10603 * [addl dest, ct]
10605 * Size 5 - 8.
10607 if (ct)
10608 tmp = expand_simple_binop (mode, PLUS,
10609 tmp, GEN_INT (ct),
10610 copy_rtx (tmp), 1, OPTAB_DIRECT);
10612 else if (cf == -1)
10615 * cmpl op0,op1
10616 * sbbl dest,dest
10617 * orl $ct, dest
10619 * Size 8.
10621 tmp = expand_simple_binop (mode, IOR,
10622 tmp, GEN_INT (ct),
10623 copy_rtx (tmp), 1, OPTAB_DIRECT);
10625 else if (diff == -1 && ct)
10628 * cmpl op0,op1
10629 * sbbl dest,dest
10630 * notl dest
10631 * [addl dest, cf]
10633 * Size 8 - 11.
10635 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10636 if (cf)
10637 tmp = expand_simple_binop (mode, PLUS,
10638 copy_rtx (tmp), GEN_INT (cf),
10639 copy_rtx (tmp), 1, OPTAB_DIRECT);
10641 else
10644 * cmpl op0,op1
10645 * sbbl dest,dest
10646 * [notl dest]
10647 * andl cf - ct, dest
10648 * [addl dest, ct]
10650 * Size 8 - 11.
10653 if (cf == 0)
10655 cf = ct;
10656 ct = 0;
10657 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10660 tmp = expand_simple_binop (mode, AND,
10661 copy_rtx (tmp),
10662 gen_int_mode (cf - ct, mode),
10663 copy_rtx (tmp), 1, OPTAB_DIRECT);
10664 if (ct)
10665 tmp = expand_simple_binop (mode, PLUS,
10666 copy_rtx (tmp), GEN_INT (ct),
10667 copy_rtx (tmp), 1, OPTAB_DIRECT);
10670 if (!rtx_equal_p (tmp, out))
10671 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10673 return 1; /* DONE */
10676 if (diff < 0)
10678 HOST_WIDE_INT tmp;
10679 tmp = ct, ct = cf, cf = tmp;
10680 diff = -diff;
10681 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10683 /* We may be reversing unordered compare to normal compare, that
10684 is not valid in general (we may convert non-trapping condition
10685 to trapping one), however on i386 we currently emit all
10686 comparisons unordered. */
10687 compare_code = reverse_condition_maybe_unordered (compare_code);
10688 code = reverse_condition_maybe_unordered (code);
10690 else
10692 compare_code = reverse_condition (compare_code);
10693 code = reverse_condition (code);
10697 compare_code = UNKNOWN;
10698 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10699 && GET_CODE (ix86_compare_op1) == CONST_INT)
10701 if (ix86_compare_op1 == const0_rtx
10702 && (code == LT || code == GE))
10703 compare_code = code;
10704 else if (ix86_compare_op1 == constm1_rtx)
10706 if (code == LE)
10707 compare_code = LT;
10708 else if (code == GT)
10709 compare_code = GE;
10713 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10714 if (compare_code != UNKNOWN
10715 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10716 && (cf == -1 || ct == -1))
10718 /* If lea code below could be used, only optimize
10719 if it results in a 2 insn sequence. */
10721 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10722 || diff == 3 || diff == 5 || diff == 9)
10723 || (compare_code == LT && ct == -1)
10724 || (compare_code == GE && cf == -1))
10727 * notl op1 (if necessary)
10728 * sarl $31, op1
10729 * orl cf, op1
10731 if (ct != -1)
10733 cf = ct;
10734 ct = -1;
10735 code = reverse_condition (code);
10738 out = emit_store_flag (out, code, ix86_compare_op0,
10739 ix86_compare_op1, VOIDmode, 0, -1);
10741 out = expand_simple_binop (mode, IOR,
10742 out, GEN_INT (cf),
10743 out, 1, OPTAB_DIRECT);
10744 if (out != operands[0])
10745 emit_move_insn (operands[0], out);
10747 return 1; /* DONE */
10752 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10753 || diff == 3 || diff == 5 || diff == 9)
10754 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10755 && (mode != DImode
10756 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
10759 * xorl dest,dest
10760 * cmpl op1,op2
10761 * setcc dest
10762 * lea cf(dest*(ct-cf)),dest
10764 * Size 14.
10766 * This also catches the degenerate setcc-only case.
10769 rtx tmp;
10770 int nops;
10772 out = emit_store_flag (out, code, ix86_compare_op0,
10773 ix86_compare_op1, VOIDmode, 0, 1);
10775 nops = 0;
10776 /* On x86_64 the lea instruction operates on Pmode, so we need
10777 to get arithmetics done in proper mode to match. */
10778 if (diff == 1)
10779 tmp = copy_rtx (out);
10780 else
10782 rtx out1;
10783 out1 = copy_rtx (out);
10784 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10785 nops++;
10786 if (diff & 1)
10788 tmp = gen_rtx_PLUS (mode, tmp, out1);
10789 nops++;
10792 if (cf != 0)
10794 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10795 nops++;
10797 if (!rtx_equal_p (tmp, out))
10799 if (nops == 1)
10800 out = force_operand (tmp, copy_rtx (out));
10801 else
10802 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10804 if (!rtx_equal_p (out, operands[0]))
10805 emit_move_insn (operands[0], copy_rtx (out));
10807 return 1; /* DONE */
10811 * General case: Jumpful:
10812 * xorl dest,dest cmpl op1, op2
10813 * cmpl op1, op2 movl ct, dest
10814 * setcc dest jcc 1f
10815 * decl dest movl cf, dest
10816 * andl (cf-ct),dest 1:
10817 * addl ct,dest
10819 * Size 20. Size 14.
10821 * This is reasonably steep, but branch mispredict costs are
10822 * high on modern cpus, so consider failing only if optimizing
10823 * for space.
10826 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10827 && BRANCH_COST >= 2)
10829 if (cf == 0)
10831 cf = ct;
10832 ct = 0;
10833 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10834 /* We may be reversing unordered compare to normal compare,
10835 that is not valid in general (we may convert non-trapping
10836 condition to trapping one), however on i386 we currently
10837 emit all comparisons unordered. */
10838 code = reverse_condition_maybe_unordered (code);
10839 else
10841 code = reverse_condition (code);
10842 if (compare_code != UNKNOWN)
10843 compare_code = reverse_condition (compare_code);
10847 if (compare_code != UNKNOWN)
10849 /* notl op1 (if needed)
10850 sarl $31, op1
10851 andl (cf-ct), op1
10852 addl ct, op1
10854 For x < 0 (resp. x <= -1) there will be no notl,
10855 so if possible swap the constants to get rid of the
10856 complement.
10857 True/false will be -1/0 while code below (store flag
10858 followed by decrement) is 0/-1, so the constants need
10859 to be exchanged once more. */
10861 if (compare_code == GE || !cf)
10863 code = reverse_condition (code);
10864 compare_code = LT;
10866 else
10868 HOST_WIDE_INT tmp = cf;
10869 cf = ct;
10870 ct = tmp;
10873 out = emit_store_flag (out, code, ix86_compare_op0,
10874 ix86_compare_op1, VOIDmode, 0, -1);
10876 else
10878 out = emit_store_flag (out, code, ix86_compare_op0,
10879 ix86_compare_op1, VOIDmode, 0, 1);
10881 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10882 copy_rtx (out), 1, OPTAB_DIRECT);
10885 out = expand_simple_binop (mode, AND, copy_rtx (out),
10886 gen_int_mode (cf - ct, mode),
10887 copy_rtx (out), 1, OPTAB_DIRECT);
10888 if (ct)
10889 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10890 copy_rtx (out), 1, OPTAB_DIRECT);
10891 if (!rtx_equal_p (out, operands[0]))
10892 emit_move_insn (operands[0], copy_rtx (out));
10894 return 1; /* DONE */
10898 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10900 /* Try a few things more with specific constants and a variable. */
10902 optab op;
10903 rtx var, orig_out, out, tmp;
10905 if (BRANCH_COST <= 2)
10906 return 0; /* FAIL */
10908 /* If one of the two operands is an interesting constant, load a
10909 constant with the above and mask it in with a logical operation. */
10911 if (GET_CODE (operands[2]) == CONST_INT)
10913 var = operands[3];
10914 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10915 operands[3] = constm1_rtx, op = and_optab;
10916 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10917 operands[3] = const0_rtx, op = ior_optab;
10918 else
10919 return 0; /* FAIL */
10921 else if (GET_CODE (operands[3]) == CONST_INT)
10923 var = operands[2];
10924 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10925 operands[2] = constm1_rtx, op = and_optab;
10926 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10927 operands[2] = const0_rtx, op = ior_optab;
10928 else
10929 return 0; /* FAIL */
10931 else
10932 return 0; /* FAIL */
10934 orig_out = operands[0];
10935 tmp = gen_reg_rtx (mode);
10936 operands[0] = tmp;
10938 /* Recurse to get the constant loaded. */
10939 if (ix86_expand_int_movcc (operands) == 0)
10940 return 0; /* FAIL */
10942 /* Mask in the interesting variable. */
10943 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10944 OPTAB_WIDEN);
10945 if (!rtx_equal_p (out, orig_out))
10946 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10948 return 1; /* DONE */
10952 * For comparison with above,
10954 * movl cf,dest
10955 * movl ct,tmp
10956 * cmpl op1,op2
10957 * cmovcc tmp,dest
10959 * Size 15.
10962 if (! nonimmediate_operand (operands[2], mode))
10963 operands[2] = force_reg (mode, operands[2]);
10964 if (! nonimmediate_operand (operands[3], mode))
10965 operands[3] = force_reg (mode, operands[3]);
10967 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10969 rtx tmp = gen_reg_rtx (mode);
10970 emit_move_insn (tmp, operands[3]);
10971 operands[3] = tmp;
10973 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10975 rtx tmp = gen_reg_rtx (mode);
10976 emit_move_insn (tmp, operands[2]);
10977 operands[2] = tmp;
10980 if (! register_operand (operands[2], VOIDmode)
10981 && (mode == QImode
10982 || ! register_operand (operands[3], VOIDmode)))
10983 operands[2] = force_reg (mode, operands[2]);
10985 if (mode == QImode
10986 && ! register_operand (operands[3], VOIDmode))
10987 operands[3] = force_reg (mode, operands[3]);
10989 emit_insn (compare_seq);
10990 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10991 gen_rtx_IF_THEN_ELSE (mode,
10992 compare_op, operands[2],
10993 operands[3])));
10994 if (bypass_test)
10995 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10996 gen_rtx_IF_THEN_ELSE (mode,
10997 bypass_test,
10998 copy_rtx (operands[3]),
10999 copy_rtx (operands[0]))));
11000 if (second_test)
11001 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11002 gen_rtx_IF_THEN_ELSE (mode,
11003 second_test,
11004 copy_rtx (operands[2]),
11005 copy_rtx (operands[0]))));
11007 return 1; /* DONE */
11010 /* Swap, force into registers, or otherwise massage the two operands
11011 to an sse comparison with a mask result. Thus we differ a bit from
11012 ix86_prepare_fp_compare_args which expects to produce a flags result.
11014 The DEST operand exists to help determine whether to commute commutative
11015 operators. The POP0/POP1 operands are updated in place. The new
11016 comparison code is returned, or UNKNOWN if not implementable. */
11018 static enum rtx_code
11019 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11020 rtx *pop0, rtx *pop1)
11022 rtx tmp;
11024 switch (code)
11026 case LTGT:
11027 case UNEQ:
11028 /* We have no LTGT as an operator. We could implement it with
11029 NE & ORDERED, but this requires an extra temporary. It's
11030 not clear that it's worth it. */
11031 return UNKNOWN;
11033 case LT:
11034 case LE:
11035 case UNGT:
11036 case UNGE:
11037 /* These are supported directly. */
11038 break;
11040 case EQ:
11041 case NE:
11042 case UNORDERED:
11043 case ORDERED:
11044 /* For commutative operators, try to canonicalize the destination
11045 operand to be first in the comparison - this helps reload to
11046 avoid extra moves. */
11047 if (!dest || !rtx_equal_p (dest, *pop1))
11048 break;
11049 /* FALLTHRU */
11051 case GE:
11052 case GT:
11053 case UNLE:
11054 case UNLT:
11055 /* These are not supported directly. Swap the comparison operands
11056 to transform into something that is supported. */
11057 tmp = *pop0;
11058 *pop0 = *pop1;
11059 *pop1 = tmp;
11060 code = swap_condition (code);
11061 break;
11063 default:
11064 gcc_unreachable ();
11067 return code;
11070 /* Detect conditional moves that exactly match min/max operational
11071 semantics. Note that this is IEEE safe, as long as we don't
11072 interchange the operands.
11074 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11075 and TRUE if the operation is successful and instructions are emitted. */
11077 static bool
11078 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11079 rtx cmp_op1, rtx if_true, rtx if_false)
11081 enum machine_mode mode;
11082 bool is_min;
11083 rtx tmp;
11085 if (code == LT)
11087 else if (code == UNGE)
11089 tmp = if_true;
11090 if_true = if_false;
11091 if_false = tmp;
11093 else
11094 return false;
11096 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11097 is_min = true;
11098 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11099 is_min = false;
11100 else
11101 return false;
11103 mode = GET_MODE (dest);
11105 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11106 but MODE may be a vector mode and thus not appropriate. */
11107 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11109 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11110 rtvec v;
11112 if_true = force_reg (mode, if_true);
11113 v = gen_rtvec (2, if_true, if_false);
11114 tmp = gen_rtx_UNSPEC (mode, v, u);
11116 else
11118 code = is_min ? SMIN : SMAX;
11119 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11122 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11123 return true;
11126 /* Expand an sse vector comparison. Return the register with the result. */
11128 static rtx
11129 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11130 rtx op_true, rtx op_false)
11132 enum machine_mode mode = GET_MODE (dest);
11133 rtx x;
11135 cmp_op0 = force_reg (mode, cmp_op0);
11136 if (!nonimmediate_operand (cmp_op1, mode))
11137 cmp_op1 = force_reg (mode, cmp_op1);
11139 if (optimize
11140 || reg_overlap_mentioned_p (dest, op_true)
11141 || reg_overlap_mentioned_p (dest, op_false))
11142 dest = gen_reg_rtx (mode);
11144 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11145 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11147 return dest;
11150 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11151 operations. This is used for both scalar and vector conditional moves. */
11153 static void
11154 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11156 enum machine_mode mode = GET_MODE (dest);
11157 rtx t2, t3, x;
11159 if (op_false == CONST0_RTX (mode))
11161 op_true = force_reg (mode, op_true);
11162 x = gen_rtx_AND (mode, cmp, op_true);
11163 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11165 else if (op_true == CONST0_RTX (mode))
11167 op_false = force_reg (mode, op_false);
11168 x = gen_rtx_NOT (mode, cmp);
11169 x = gen_rtx_AND (mode, x, op_false);
11170 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11172 else
11174 op_true = force_reg (mode, op_true);
11175 op_false = force_reg (mode, op_false);
11177 t2 = gen_reg_rtx (mode);
11178 if (optimize)
11179 t3 = gen_reg_rtx (mode);
11180 else
11181 t3 = dest;
11183 x = gen_rtx_AND (mode, op_true, cmp);
11184 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11186 x = gen_rtx_NOT (mode, cmp);
11187 x = gen_rtx_AND (mode, x, op_false);
11188 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11190 x = gen_rtx_IOR (mode, t3, t2);
11191 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11195 /* Expand a floating-point conditional move. Return true if successful. */
11198 ix86_expand_fp_movcc (rtx operands[])
11200 enum machine_mode mode = GET_MODE (operands[0]);
11201 enum rtx_code code = GET_CODE (operands[1]);
11202 rtx tmp, compare_op, second_test, bypass_test;
11204 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11206 enum machine_mode cmode;
11208 /* Since we've no cmove for sse registers, don't force bad register
11209 allocation just to gain access to it. Deny movcc when the
11210 comparison mode doesn't match the move mode. */
11211 cmode = GET_MODE (ix86_compare_op0);
11212 if (cmode == VOIDmode)
11213 cmode = GET_MODE (ix86_compare_op1);
11214 if (cmode != mode)
11215 return 0;
11217 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11218 &ix86_compare_op0,
11219 &ix86_compare_op1);
11220 if (code == UNKNOWN)
11221 return 0;
11223 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11224 ix86_compare_op1, operands[2],
11225 operands[3]))
11226 return 1;
11228 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11229 ix86_compare_op1, operands[2], operands[3]);
11230 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11231 return 1;
11234 /* The floating point conditional move instructions don't directly
11235 support conditions resulting from a signed integer comparison. */
11237 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11239 /* The floating point conditional move instructions don't directly
11240 support signed integer comparisons. */
11242 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11244 gcc_assert (!second_test && !bypass_test);
11245 tmp = gen_reg_rtx (QImode);
11246 ix86_expand_setcc (code, tmp);
11247 code = NE;
11248 ix86_compare_op0 = tmp;
11249 ix86_compare_op1 = const0_rtx;
11250 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11252 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11254 tmp = gen_reg_rtx (mode);
11255 emit_move_insn (tmp, operands[3]);
11256 operands[3] = tmp;
11258 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11260 tmp = gen_reg_rtx (mode);
11261 emit_move_insn (tmp, operands[2]);
11262 operands[2] = tmp;
11265 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11266 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11267 operands[2], operands[3])));
11268 if (bypass_test)
11269 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11270 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11271 operands[3], operands[0])));
11272 if (second_test)
11273 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11274 gen_rtx_IF_THEN_ELSE (mode, second_test,
11275 operands[2], operands[0])));
11277 return 1;
11280 /* Expand a floating-point vector conditional move; a vcond operation
11281 rather than a movcc operation. */
11283 bool
11284 ix86_expand_fp_vcond (rtx operands[])
11286 enum rtx_code code = GET_CODE (operands[3]);
11287 rtx cmp;
11289 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11290 &operands[4], &operands[5]);
11291 if (code == UNKNOWN)
11292 return false;
11294 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11295 operands[5], operands[1], operands[2]))
11296 return true;
11298 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11299 operands[1], operands[2]);
11300 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11301 return true;
11304 /* Expand a signed integral vector conditional move. */
11306 bool
11307 ix86_expand_int_vcond (rtx operands[])
11309 enum machine_mode mode = GET_MODE (operands[0]);
11310 enum rtx_code code = GET_CODE (operands[3]);
11311 bool negate = false;
11312 rtx x, cop0, cop1;
11314 cop0 = operands[4];
11315 cop1 = operands[5];
11317 /* Canonicalize the comparison to EQ, GT, GTU. */
11318 switch (code)
11320 case EQ:
11321 case GT:
11322 case GTU:
11323 break;
11325 case NE:
11326 case LE:
11327 case LEU:
11328 code = reverse_condition (code);
11329 negate = true;
11330 break;
11332 case GE:
11333 case GEU:
11334 code = reverse_condition (code);
11335 negate = true;
11336 /* FALLTHRU */
11338 case LT:
11339 case LTU:
11340 code = swap_condition (code);
11341 x = cop0, cop0 = cop1, cop1 = x;
11342 break;
11344 default:
11345 gcc_unreachable ();
11348 /* Unsigned parallel compare is not supported by the hardware. Play some
11349 tricks to turn this into a signed comparison against 0. */
11350 if (code == GTU)
11352 switch (mode)
11354 case V4SImode:
11356 rtx t1, t2, mask;
11358 /* Perform a parallel modulo subtraction. */
11359 t1 = gen_reg_rtx (mode);
11360 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11362 /* Extract the original sign bit of op0. */
11363 mask = GEN_INT (-0x80000000);
11364 mask = gen_rtx_CONST_VECTOR (mode,
11365 gen_rtvec (4, mask, mask, mask, mask));
11366 mask = force_reg (mode, mask);
11367 t2 = gen_reg_rtx (mode);
11368 emit_insn (gen_andv4si3 (t2, cop0, mask));
11370 /* XOR it back into the result of the subtraction. This results
11371 in the sign bit set iff we saw unsigned underflow. */
11372 x = gen_reg_rtx (mode);
11373 emit_insn (gen_xorv4si3 (x, t1, t2));
11375 code = GT;
11377 break;
11379 case V16QImode:
11380 case V8HImode:
11381 /* Perform a parallel unsigned saturating subtraction. */
11382 x = gen_reg_rtx (mode);
11383 emit_insn (gen_rtx_SET (VOIDmode, x,
11384 gen_rtx_US_MINUS (mode, cop0, cop1)));
11386 code = EQ;
11387 negate = !negate;
11388 break;
11390 default:
11391 gcc_unreachable ();
11394 cop0 = x;
11395 cop1 = CONST0_RTX (mode);
11398 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11399 operands[1+negate], operands[2-negate]);
11401 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11402 operands[2-negate]);
11403 return true;
11406 /* Expand conditional increment or decrement using adb/sbb instructions.
11407 The default case using setcc followed by the conditional move can be
11408 done by generic code. */
11410 ix86_expand_int_addcc (rtx operands[])
11412 enum rtx_code code = GET_CODE (operands[1]);
11413 rtx compare_op;
11414 rtx val = const0_rtx;
11415 bool fpcmp = false;
11416 enum machine_mode mode = GET_MODE (operands[0]);
11418 if (operands[3] != const1_rtx
11419 && operands[3] != constm1_rtx)
11420 return 0;
11421 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11422 ix86_compare_op1, &compare_op))
11423 return 0;
11424 code = GET_CODE (compare_op);
11426 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11427 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11429 fpcmp = true;
11430 code = ix86_fp_compare_code_to_integer (code);
11433 if (code != LTU)
11435 val = constm1_rtx;
11436 if (fpcmp)
11437 PUT_CODE (compare_op,
11438 reverse_condition_maybe_unordered
11439 (GET_CODE (compare_op)));
11440 else
11441 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11443 PUT_MODE (compare_op, mode);
11445 /* Construct either adc or sbb insn. */
11446 if ((code == LTU) == (operands[3] == constm1_rtx))
11448 switch (GET_MODE (operands[0]))
11450 case QImode:
11451 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11452 break;
11453 case HImode:
11454 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11455 break;
11456 case SImode:
11457 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11458 break;
11459 case DImode:
11460 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11461 break;
11462 default:
11463 gcc_unreachable ();
11466 else
11468 switch (GET_MODE (operands[0]))
11470 case QImode:
11471 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11472 break;
11473 case HImode:
11474 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11475 break;
11476 case SImode:
11477 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11478 break;
11479 case DImode:
11480 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11481 break;
11482 default:
11483 gcc_unreachable ();
11486 return 1; /* DONE */
11490 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11491 works for floating pointer parameters and nonoffsetable memories.
11492 For pushes, it returns just stack offsets; the values will be saved
11493 in the right order. Maximally three parts are generated. */
11495 static int
11496 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11498 int size;
11500 if (!TARGET_64BIT)
11501 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11502 else
11503 size = (GET_MODE_SIZE (mode) + 4) / 8;
11505 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11506 gcc_assert (size >= 2 && size <= 3);
11508 /* Optimize constant pool reference to immediates. This is used by fp
11509 moves, that force all constants to memory to allow combining. */
11510 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11512 rtx tmp = maybe_get_pool_constant (operand);
11513 if (tmp)
11514 operand = tmp;
11517 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11519 /* The only non-offsetable memories we handle are pushes. */
11520 int ok = push_operand (operand, VOIDmode);
11522 gcc_assert (ok);
11524 operand = copy_rtx (operand);
11525 PUT_MODE (operand, Pmode);
11526 parts[0] = parts[1] = parts[2] = operand;
11527 return size;
11530 if (GET_CODE (operand) == CONST_VECTOR)
11532 enum machine_mode imode = int_mode_for_mode (mode);
11533 /* Caution: if we looked through a constant pool memory above,
11534 the operand may actually have a different mode now. That's
11535 ok, since we want to pun this all the way back to an integer. */
11536 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11537 gcc_assert (operand != NULL);
11538 mode = imode;
11541 if (!TARGET_64BIT)
11543 if (mode == DImode)
11544 split_di (&operand, 1, &parts[0], &parts[1]);
11545 else
11547 if (REG_P (operand))
11549 gcc_assert (reload_completed);
11550 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11551 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11552 if (size == 3)
11553 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11555 else if (offsettable_memref_p (operand))
11557 operand = adjust_address (operand, SImode, 0);
11558 parts[0] = operand;
11559 parts[1] = adjust_address (operand, SImode, 4);
11560 if (size == 3)
11561 parts[2] = adjust_address (operand, SImode, 8);
11563 else if (GET_CODE (operand) == CONST_DOUBLE)
11565 REAL_VALUE_TYPE r;
11566 long l[4];
11568 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11569 switch (mode)
11571 case XFmode:
11572 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11573 parts[2] = gen_int_mode (l[2], SImode);
11574 break;
11575 case DFmode:
11576 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11577 break;
11578 default:
11579 gcc_unreachable ();
11581 parts[1] = gen_int_mode (l[1], SImode);
11582 parts[0] = gen_int_mode (l[0], SImode);
11584 else
11585 gcc_unreachable ();
11588 else
11590 if (mode == TImode)
11591 split_ti (&operand, 1, &parts[0], &parts[1]);
11592 if (mode == XFmode || mode == TFmode)
11594 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11595 if (REG_P (operand))
11597 gcc_assert (reload_completed);
11598 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11599 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11601 else if (offsettable_memref_p (operand))
11603 operand = adjust_address (operand, DImode, 0);
11604 parts[0] = operand;
11605 parts[1] = adjust_address (operand, upper_mode, 8);
11607 else if (GET_CODE (operand) == CONST_DOUBLE)
11609 REAL_VALUE_TYPE r;
11610 long l[4];
11612 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11613 real_to_target (l, &r, mode);
11615 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11616 if (HOST_BITS_PER_WIDE_INT >= 64)
11617 parts[0]
11618 = gen_int_mode
11619 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11620 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11621 DImode);
11622 else
11623 parts[0] = immed_double_const (l[0], l[1], DImode);
11625 if (upper_mode == SImode)
11626 parts[1] = gen_int_mode (l[2], SImode);
11627 else if (HOST_BITS_PER_WIDE_INT >= 64)
11628 parts[1]
11629 = gen_int_mode
11630 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11631 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11632 DImode);
11633 else
11634 parts[1] = immed_double_const (l[2], l[3], DImode);
11636 else
11637 gcc_unreachable ();
11641 return size;
11644 /* Emit insns to perform a move or push of DI, DF, and XF values.
11645 Return false when normal moves are needed; true when all required
11646 insns have been emitted. Operands 2-4 contain the input values
11647 int the correct order; operands 5-7 contain the output values. */
11649 void
11650 ix86_split_long_move (rtx operands[])
11652 rtx part[2][3];
11653 int nparts;
11654 int push = 0;
11655 int collisions = 0;
11656 enum machine_mode mode = GET_MODE (operands[0]);
11658 /* The DFmode expanders may ask us to move double.
11659 For 64bit target this is single move. By hiding the fact
11660 here we simplify i386.md splitters. */
11661 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11663 /* Optimize constant pool reference to immediates. This is used by
11664 fp moves, that force all constants to memory to allow combining. */
11666 if (GET_CODE (operands[1]) == MEM
11667 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11668 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11669 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11670 if (push_operand (operands[0], VOIDmode))
11672 operands[0] = copy_rtx (operands[0]);
11673 PUT_MODE (operands[0], Pmode);
11675 else
11676 operands[0] = gen_lowpart (DImode, operands[0]);
11677 operands[1] = gen_lowpart (DImode, operands[1]);
11678 emit_move_insn (operands[0], operands[1]);
11679 return;
11682 /* The only non-offsettable memory we handle is push. */
11683 if (push_operand (operands[0], VOIDmode))
11684 push = 1;
11685 else
11686 gcc_assert (GET_CODE (operands[0]) != MEM
11687 || offsettable_memref_p (operands[0]));
11689 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11690 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11692 /* When emitting push, take care for source operands on the stack. */
11693 if (push && GET_CODE (operands[1]) == MEM
11694 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11696 if (nparts == 3)
11697 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11698 XEXP (part[1][2], 0));
11699 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11700 XEXP (part[1][1], 0));
11703 /* We need to do copy in the right order in case an address register
11704 of the source overlaps the destination. */
11705 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11707 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11708 collisions++;
11709 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11710 collisions++;
11711 if (nparts == 3
11712 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11713 collisions++;
11715 /* Collision in the middle part can be handled by reordering. */
11716 if (collisions == 1 && nparts == 3
11717 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11719 rtx tmp;
11720 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11721 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11724 /* If there are more collisions, we can't handle it by reordering.
11725 Do an lea to the last part and use only one colliding move. */
11726 else if (collisions > 1)
11728 rtx base;
11730 collisions = 1;
11732 base = part[0][nparts - 1];
11734 /* Handle the case when the last part isn't valid for lea.
11735 Happens in 64-bit mode storing the 12-byte XFmode. */
11736 if (GET_MODE (base) != Pmode)
11737 base = gen_rtx_REG (Pmode, REGNO (base));
11739 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11740 part[1][0] = replace_equiv_address (part[1][0], base);
11741 part[1][1] = replace_equiv_address (part[1][1],
11742 plus_constant (base, UNITS_PER_WORD));
11743 if (nparts == 3)
11744 part[1][2] = replace_equiv_address (part[1][2],
11745 plus_constant (base, 8));
11749 if (push)
11751 if (!TARGET_64BIT)
11753 if (nparts == 3)
11755 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
11756 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
11757 emit_move_insn (part[0][2], part[1][2]);
11760 else
11762 /* In 64bit mode we don't have 32bit push available. In case this is
11763 register, it is OK - we will just use larger counterpart. We also
11764 retype memory - these comes from attempt to avoid REX prefix on
11765 moving of second half of TFmode value. */
11766 if (GET_MODE (part[1][1]) == SImode)
11768 switch (GET_CODE (part[1][1]))
11770 case MEM:
11771 part[1][1] = adjust_address (part[1][1], DImode, 0);
11772 break;
11774 case REG:
11775 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11776 break;
11778 default:
11779 gcc_unreachable ();
11782 if (GET_MODE (part[1][0]) == SImode)
11783 part[1][0] = part[1][1];
11786 emit_move_insn (part[0][1], part[1][1]);
11787 emit_move_insn (part[0][0], part[1][0]);
11788 return;
11791 /* Choose correct order to not overwrite the source before it is copied. */
11792 if ((REG_P (part[0][0])
11793 && REG_P (part[1][1])
11794 && (REGNO (part[0][0]) == REGNO (part[1][1])
11795 || (nparts == 3
11796 && REGNO (part[0][0]) == REGNO (part[1][2]))))
11797 || (collisions > 0
11798 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11800 if (nparts == 3)
11802 operands[2] = part[0][2];
11803 operands[3] = part[0][1];
11804 operands[4] = part[0][0];
11805 operands[5] = part[1][2];
11806 operands[6] = part[1][1];
11807 operands[7] = part[1][0];
11809 else
11811 operands[2] = part[0][1];
11812 operands[3] = part[0][0];
11813 operands[5] = part[1][1];
11814 operands[6] = part[1][0];
11817 else
11819 if (nparts == 3)
11821 operands[2] = part[0][0];
11822 operands[3] = part[0][1];
11823 operands[4] = part[0][2];
11824 operands[5] = part[1][0];
11825 operands[6] = part[1][1];
11826 operands[7] = part[1][2];
11828 else
11830 operands[2] = part[0][0];
11831 operands[3] = part[0][1];
11832 operands[5] = part[1][0];
11833 operands[6] = part[1][1];
11837 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
11838 if (optimize_size)
11840 if (GET_CODE (operands[5]) == CONST_INT
11841 && operands[5] != const0_rtx
11842 && REG_P (operands[2]))
11844 if (GET_CODE (operands[6]) == CONST_INT
11845 && INTVAL (operands[6]) == INTVAL (operands[5]))
11846 operands[6] = operands[2];
11848 if (nparts == 3
11849 && GET_CODE (operands[7]) == CONST_INT
11850 && INTVAL (operands[7]) == INTVAL (operands[5]))
11851 operands[7] = operands[2];
11854 if (nparts == 3
11855 && GET_CODE (operands[6]) == CONST_INT
11856 && operands[6] != const0_rtx
11857 && REG_P (operands[3])
11858 && GET_CODE (operands[7]) == CONST_INT
11859 && INTVAL (operands[7]) == INTVAL (operands[6]))
11860 operands[7] = operands[3];
11863 emit_move_insn (operands[2], operands[5]);
11864 emit_move_insn (operands[3], operands[6]);
11865 if (nparts == 3)
11866 emit_move_insn (operands[4], operands[7]);
11868 return;
11871 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
11872 left shift by a constant, either using a single shift or
11873 a sequence of add instructions. */
11875 static void
11876 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
11878 if (count == 1)
11880 emit_insn ((mode == DImode
11881 ? gen_addsi3
11882 : gen_adddi3) (operand, operand, operand));
11884 else if (!optimize_size
11885 && count * ix86_cost->add <= ix86_cost->shift_const)
11887 int i;
11888 for (i=0; i<count; i++)
11890 emit_insn ((mode == DImode
11891 ? gen_addsi3
11892 : gen_adddi3) (operand, operand, operand));
11895 else
11896 emit_insn ((mode == DImode
11897 ? gen_ashlsi3
11898 : gen_ashldi3) (operand, operand, GEN_INT (count)));
11901 void
11902 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
11904 rtx low[2], high[2];
11905 int count;
11906 const int single_width = mode == DImode ? 32 : 64;
11908 if (GET_CODE (operands[2]) == CONST_INT)
11910 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11911 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11913 if (count >= single_width)
11915 emit_move_insn (high[0], low[1]);
11916 emit_move_insn (low[0], const0_rtx);
11918 if (count > single_width)
11919 ix86_expand_ashl_const (high[0], count - single_width, mode);
11921 else
11923 if (!rtx_equal_p (operands[0], operands[1]))
11924 emit_move_insn (operands[0], operands[1]);
11925 emit_insn ((mode == DImode
11926 ? gen_x86_shld_1
11927 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
11928 ix86_expand_ashl_const (low[0], count, mode);
11930 return;
11933 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11935 if (operands[1] == const1_rtx)
11937 /* Assuming we've chosen a QImode capable registers, then 1 << N
11938 can be done with two 32/64-bit shifts, no branches, no cmoves. */
11939 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11941 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11943 ix86_expand_clear (low[0]);
11944 ix86_expand_clear (high[0]);
11945 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
11947 d = gen_lowpart (QImode, low[0]);
11948 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11949 s = gen_rtx_EQ (QImode, flags, const0_rtx);
11950 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11952 d = gen_lowpart (QImode, high[0]);
11953 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11954 s = gen_rtx_NE (QImode, flags, const0_rtx);
11955 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11958 /* Otherwise, we can get the same results by manually performing
11959 a bit extract operation on bit 5/6, and then performing the two
11960 shifts. The two methods of getting 0/1 into low/high are exactly
11961 the same size. Avoiding the shift in the bit extract case helps
11962 pentium4 a bit; no one else seems to care much either way. */
11963 else
11965 rtx x;
11967 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
11968 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
11969 else
11970 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
11971 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
11973 emit_insn ((mode == DImode
11974 ? gen_lshrsi3
11975 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
11976 emit_insn ((mode == DImode
11977 ? gen_andsi3
11978 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
11979 emit_move_insn (low[0], high[0]);
11980 emit_insn ((mode == DImode
11981 ? gen_xorsi3
11982 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
11985 emit_insn ((mode == DImode
11986 ? gen_ashlsi3
11987 : gen_ashldi3) (low[0], low[0], operands[2]));
11988 emit_insn ((mode == DImode
11989 ? gen_ashlsi3
11990 : gen_ashldi3) (high[0], high[0], operands[2]));
11991 return;
11994 if (operands[1] == constm1_rtx)
11996 /* For -1 << N, we can avoid the shld instruction, because we
11997 know that we're shifting 0...31/63 ones into a -1. */
11998 emit_move_insn (low[0], constm1_rtx);
11999 if (optimize_size)
12000 emit_move_insn (high[0], low[0]);
12001 else
12002 emit_move_insn (high[0], constm1_rtx);
12004 else
12006 if (!rtx_equal_p (operands[0], operands[1]))
12007 emit_move_insn (operands[0], operands[1]);
12009 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12010 emit_insn ((mode == DImode
12011 ? gen_x86_shld_1
12012 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12015 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12017 if (TARGET_CMOVE && scratch)
12019 ix86_expand_clear (scratch);
12020 emit_insn ((mode == DImode
12021 ? gen_x86_shift_adj_1
12022 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12024 else
12025 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12028 void
12029 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12031 rtx low[2], high[2];
12032 int count;
12033 const int single_width = mode == DImode ? 32 : 64;
12035 if (GET_CODE (operands[2]) == CONST_INT)
12037 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12038 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12040 if (count == single_width * 2 - 1)
12042 emit_move_insn (high[0], high[1]);
12043 emit_insn ((mode == DImode
12044 ? gen_ashrsi3
12045 : gen_ashrdi3) (high[0], high[0],
12046 GEN_INT (single_width - 1)));
12047 emit_move_insn (low[0], high[0]);
12050 else if (count >= single_width)
12052 emit_move_insn (low[0], high[1]);
12053 emit_move_insn (high[0], low[0]);
12054 emit_insn ((mode == DImode
12055 ? gen_ashrsi3
12056 : gen_ashrdi3) (high[0], high[0],
12057 GEN_INT (single_width - 1)));
12058 if (count > single_width)
12059 emit_insn ((mode == DImode
12060 ? gen_ashrsi3
12061 : gen_ashrdi3) (low[0], low[0],
12062 GEN_INT (count - single_width)));
12064 else
12066 if (!rtx_equal_p (operands[0], operands[1]))
12067 emit_move_insn (operands[0], operands[1]);
12068 emit_insn ((mode == DImode
12069 ? gen_x86_shrd_1
12070 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12071 emit_insn ((mode == DImode
12072 ? gen_ashrsi3
12073 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12076 else
12078 if (!rtx_equal_p (operands[0], operands[1]))
12079 emit_move_insn (operands[0], operands[1]);
12081 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12083 emit_insn ((mode == DImode
12084 ? gen_x86_shrd_1
12085 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12086 emit_insn ((mode == DImode
12087 ? gen_ashrsi3
12088 : gen_ashrdi3) (high[0], high[0], operands[2]));
12090 if (TARGET_CMOVE && scratch)
12092 emit_move_insn (scratch, high[0]);
12093 emit_insn ((mode == DImode
12094 ? gen_ashrsi3
12095 : gen_ashrdi3) (scratch, scratch,
12096 GEN_INT (single_width - 1)));
12097 emit_insn ((mode == DImode
12098 ? gen_x86_shift_adj_1
12099 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12100 scratch));
12102 else
12103 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12107 void
12108 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12110 rtx low[2], high[2];
12111 int count;
12112 const int single_width = mode == DImode ? 32 : 64;
12114 if (GET_CODE (operands[2]) == CONST_INT)
12116 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12117 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12119 if (count >= single_width)
12121 emit_move_insn (low[0], high[1]);
12122 ix86_expand_clear (high[0]);
12124 if (count > single_width)
12125 emit_insn ((mode == DImode
12126 ? gen_lshrsi3
12127 : gen_lshrdi3) (low[0], low[0],
12128 GEN_INT (count - single_width)));
12130 else
12132 if (!rtx_equal_p (operands[0], operands[1]))
12133 emit_move_insn (operands[0], operands[1]);
12134 emit_insn ((mode == DImode
12135 ? gen_x86_shrd_1
12136 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12137 emit_insn ((mode == DImode
12138 ? gen_lshrsi3
12139 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12142 else
12144 if (!rtx_equal_p (operands[0], operands[1]))
12145 emit_move_insn (operands[0], operands[1]);
12147 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12149 emit_insn ((mode == DImode
12150 ? gen_x86_shrd_1
12151 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12152 emit_insn ((mode == DImode
12153 ? gen_lshrsi3
12154 : gen_lshrdi3) (high[0], high[0], operands[2]));
12156 /* Heh. By reversing the arguments, we can reuse this pattern. */
12157 if (TARGET_CMOVE && scratch)
12159 ix86_expand_clear (scratch);
12160 emit_insn ((mode == DImode
12161 ? gen_x86_shift_adj_1
12162 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12163 scratch));
12165 else
12166 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12170 /* Helper function for the string operations below. Dest VARIABLE whether
12171 it is aligned to VALUE bytes. If true, jump to the label. */
12172 static rtx
12173 ix86_expand_aligntest (rtx variable, int value)
12175 rtx label = gen_label_rtx ();
12176 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12177 if (GET_MODE (variable) == DImode)
12178 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12179 else
12180 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12181 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12182 1, label);
12183 return label;
12186 /* Adjust COUNTER by the VALUE. */
12187 static void
12188 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12190 if (GET_MODE (countreg) == DImode)
12191 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12192 else
12193 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12196 /* Zero extend possibly SImode EXP to Pmode register. */
12198 ix86_zero_extend_to_Pmode (rtx exp)
12200 rtx r;
12201 if (GET_MODE (exp) == VOIDmode)
12202 return force_reg (Pmode, exp);
12203 if (GET_MODE (exp) == Pmode)
12204 return copy_to_mode_reg (Pmode, exp);
12205 r = gen_reg_rtx (Pmode);
12206 emit_insn (gen_zero_extendsidi2 (r, exp));
12207 return r;
12210 /* Expand string move (memcpy) operation. Use i386 string operations when
12211 profitable. expand_clrmem contains similar code. */
12213 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12215 rtx srcreg, destreg, countreg, srcexp, destexp;
12216 enum machine_mode counter_mode;
12217 HOST_WIDE_INT align = 0;
12218 unsigned HOST_WIDE_INT count = 0;
12220 if (GET_CODE (align_exp) == CONST_INT)
12221 align = INTVAL (align_exp);
12223 /* Can't use any of this if the user has appropriated esi or edi. */
12224 if (global_regs[4] || global_regs[5])
12225 return 0;
12227 /* This simple hack avoids all inlining code and simplifies code below. */
12228 if (!TARGET_ALIGN_STRINGOPS)
12229 align = 64;
12231 if (GET_CODE (count_exp) == CONST_INT)
12233 count = INTVAL (count_exp);
12234 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12235 return 0;
12238 /* Figure out proper mode for counter. For 32bits it is always SImode,
12239 for 64bits use SImode when possible, otherwise DImode.
12240 Set count to number of bytes copied when known at compile time. */
12241 if (!TARGET_64BIT
12242 || GET_MODE (count_exp) == SImode
12243 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12244 counter_mode = SImode;
12245 else
12246 counter_mode = DImode;
12248 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12250 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12251 if (destreg != XEXP (dst, 0))
12252 dst = replace_equiv_address_nv (dst, destreg);
12253 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12254 if (srcreg != XEXP (src, 0))
12255 src = replace_equiv_address_nv (src, srcreg);
12257 /* When optimizing for size emit simple rep ; movsb instruction for
12258 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12259 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12260 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12261 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12262 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12263 known to be zero or not. The rep; movsb sequence causes higher
12264 register pressure though, so take that into account. */
12266 if ((!optimize || optimize_size)
12267 && (count == 0
12268 || ((count & 0x03)
12269 && (!optimize_size
12270 || count > 5 * 4
12271 || (count & 3) + count / 4 > 6))))
12273 emit_insn (gen_cld ());
12274 countreg = ix86_zero_extend_to_Pmode (count_exp);
12275 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12276 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12277 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12278 destexp, srcexp));
12281 /* For constant aligned (or small unaligned) copies use rep movsl
12282 followed by code copying the rest. For PentiumPro ensure 8 byte
12283 alignment to allow rep movsl acceleration. */
12285 else if (count != 0
12286 && (align >= 8
12287 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12288 || optimize_size || count < (unsigned int) 64))
12290 unsigned HOST_WIDE_INT offset = 0;
12291 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12292 rtx srcmem, dstmem;
12294 emit_insn (gen_cld ());
12295 if (count & ~(size - 1))
12297 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12299 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12301 while (offset < (count & ~(size - 1)))
12303 srcmem = adjust_automodify_address_nv (src, movs_mode,
12304 srcreg, offset);
12305 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12306 destreg, offset);
12307 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12308 offset += size;
12311 else
12313 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12314 & (TARGET_64BIT ? -1 : 0x3fffffff));
12315 countreg = copy_to_mode_reg (counter_mode, countreg);
12316 countreg = ix86_zero_extend_to_Pmode (countreg);
12318 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12319 GEN_INT (size == 4 ? 2 : 3));
12320 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12321 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12323 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12324 countreg, destexp, srcexp));
12325 offset = count & ~(size - 1);
12328 if (size == 8 && (count & 0x04))
12330 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12331 offset);
12332 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12333 offset);
12334 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12335 offset += 4;
12337 if (count & 0x02)
12339 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12340 offset);
12341 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12342 offset);
12343 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12344 offset += 2;
12346 if (count & 0x01)
12348 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12349 offset);
12350 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12351 offset);
12352 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12355 /* The generic code based on the glibc implementation:
12356 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12357 allowing accelerated copying there)
12358 - copy the data using rep movsl
12359 - copy the rest. */
12360 else
12362 rtx countreg2;
12363 rtx label = NULL;
12364 rtx srcmem, dstmem;
12365 int desired_alignment = (TARGET_PENTIUMPRO
12366 && (count == 0 || count >= (unsigned int) 260)
12367 ? 8 : UNITS_PER_WORD);
12368 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12369 dst = change_address (dst, BLKmode, destreg);
12370 src = change_address (src, BLKmode, srcreg);
12372 /* In case we don't know anything about the alignment, default to
12373 library version, since it is usually equally fast and result in
12374 shorter code.
12376 Also emit call when we know that the count is large and call overhead
12377 will not be important. */
12378 if (!TARGET_INLINE_ALL_STRINGOPS
12379 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12380 return 0;
12382 if (TARGET_SINGLE_STRINGOP)
12383 emit_insn (gen_cld ());
12385 countreg2 = gen_reg_rtx (Pmode);
12386 countreg = copy_to_mode_reg (counter_mode, count_exp);
12388 /* We don't use loops to align destination and to copy parts smaller
12389 than 4 bytes, because gcc is able to optimize such code better (in
12390 the case the destination or the count really is aligned, gcc is often
12391 able to predict the branches) and also it is friendlier to the
12392 hardware branch prediction.
12394 Using loops is beneficial for generic case, because we can
12395 handle small counts using the loops. Many CPUs (such as Athlon)
12396 have large REP prefix setup costs.
12398 This is quite costly. Maybe we can revisit this decision later or
12399 add some customizability to this code. */
12401 if (count == 0 && align < desired_alignment)
12403 label = gen_label_rtx ();
12404 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12405 LEU, 0, counter_mode, 1, label);
12407 if (align <= 1)
12409 rtx label = ix86_expand_aligntest (destreg, 1);
12410 srcmem = change_address (src, QImode, srcreg);
12411 dstmem = change_address (dst, QImode, destreg);
12412 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12413 ix86_adjust_counter (countreg, 1);
12414 emit_label (label);
12415 LABEL_NUSES (label) = 1;
12417 if (align <= 2)
12419 rtx label = ix86_expand_aligntest (destreg, 2);
12420 srcmem = change_address (src, HImode, srcreg);
12421 dstmem = change_address (dst, HImode, destreg);
12422 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12423 ix86_adjust_counter (countreg, 2);
12424 emit_label (label);
12425 LABEL_NUSES (label) = 1;
12427 if (align <= 4 && desired_alignment > 4)
12429 rtx label = ix86_expand_aligntest (destreg, 4);
12430 srcmem = change_address (src, SImode, srcreg);
12431 dstmem = change_address (dst, SImode, destreg);
12432 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12433 ix86_adjust_counter (countreg, 4);
12434 emit_label (label);
12435 LABEL_NUSES (label) = 1;
12438 if (label && desired_alignment > 4 && !TARGET_64BIT)
12440 emit_label (label);
12441 LABEL_NUSES (label) = 1;
12442 label = NULL_RTX;
12444 if (!TARGET_SINGLE_STRINGOP)
12445 emit_insn (gen_cld ());
12446 if (TARGET_64BIT)
12448 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12449 GEN_INT (3)));
12450 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12452 else
12454 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12455 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12457 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12458 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12459 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12460 countreg2, destexp, srcexp));
12462 if (label)
12464 emit_label (label);
12465 LABEL_NUSES (label) = 1;
12467 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12469 srcmem = change_address (src, SImode, srcreg);
12470 dstmem = change_address (dst, SImode, destreg);
12471 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12473 if ((align <= 4 || count == 0) && TARGET_64BIT)
12475 rtx label = ix86_expand_aligntest (countreg, 4);
12476 srcmem = change_address (src, SImode, srcreg);
12477 dstmem = change_address (dst, SImode, destreg);
12478 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12479 emit_label (label);
12480 LABEL_NUSES (label) = 1;
12482 if (align > 2 && count != 0 && (count & 2))
12484 srcmem = change_address (src, HImode, srcreg);
12485 dstmem = change_address (dst, HImode, destreg);
12486 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12488 if (align <= 2 || count == 0)
12490 rtx label = ix86_expand_aligntest (countreg, 2);
12491 srcmem = change_address (src, HImode, srcreg);
12492 dstmem = change_address (dst, HImode, destreg);
12493 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12494 emit_label (label);
12495 LABEL_NUSES (label) = 1;
12497 if (align > 1 && count != 0 && (count & 1))
12499 srcmem = change_address (src, QImode, srcreg);
12500 dstmem = change_address (dst, QImode, destreg);
12501 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12503 if (align <= 1 || count == 0)
12505 rtx label = ix86_expand_aligntest (countreg, 1);
12506 srcmem = change_address (src, QImode, srcreg);
12507 dstmem = change_address (dst, QImode, destreg);
12508 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12509 emit_label (label);
12510 LABEL_NUSES (label) = 1;
12514 return 1;
12517 /* Expand string clear operation (bzero). Use i386 string operations when
12518 profitable. expand_movmem contains similar code. */
12520 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12522 rtx destreg, zeroreg, countreg, destexp;
12523 enum machine_mode counter_mode;
12524 HOST_WIDE_INT align = 0;
12525 unsigned HOST_WIDE_INT count = 0;
12527 if (GET_CODE (align_exp) == CONST_INT)
12528 align = INTVAL (align_exp);
12530 /* Can't use any of this if the user has appropriated esi. */
12531 if (global_regs[4])
12532 return 0;
12534 /* This simple hack avoids all inlining code and simplifies code below. */
12535 if (!TARGET_ALIGN_STRINGOPS)
12536 align = 32;
12538 if (GET_CODE (count_exp) == CONST_INT)
12540 count = INTVAL (count_exp);
12541 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12542 return 0;
12544 /* Figure out proper mode for counter. For 32bits it is always SImode,
12545 for 64bits use SImode when possible, otherwise DImode.
12546 Set count to number of bytes copied when known at compile time. */
12547 if (!TARGET_64BIT
12548 || GET_MODE (count_exp) == SImode
12549 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12550 counter_mode = SImode;
12551 else
12552 counter_mode = DImode;
12554 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12555 if (destreg != XEXP (dst, 0))
12556 dst = replace_equiv_address_nv (dst, destreg);
12559 /* When optimizing for size emit simple rep ; movsb instruction for
12560 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12561 sequence is 7 bytes long, so if optimizing for size and count is
12562 small enough that some stosl, stosw and stosb instructions without
12563 rep are shorter, fall back into the next if. */
12565 if ((!optimize || optimize_size)
12566 && (count == 0
12567 || ((count & 0x03)
12568 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12570 emit_insn (gen_cld ());
12572 countreg = ix86_zero_extend_to_Pmode (count_exp);
12573 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12574 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12575 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12577 else if (count != 0
12578 && (align >= 8
12579 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12580 || optimize_size || count < (unsigned int) 64))
12582 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12583 unsigned HOST_WIDE_INT offset = 0;
12585 emit_insn (gen_cld ());
12587 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12588 if (count & ~(size - 1))
12590 unsigned HOST_WIDE_INT repcount;
12591 unsigned int max_nonrep;
12593 repcount = count >> (size == 4 ? 2 : 3);
12594 if (!TARGET_64BIT)
12595 repcount &= 0x3fffffff;
12597 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12598 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12599 bytes. In both cases the latter seems to be faster for small
12600 values of N. */
12601 max_nonrep = size == 4 ? 7 : 4;
12602 if (!optimize_size)
12603 switch (ix86_tune)
12605 case PROCESSOR_PENTIUM4:
12606 case PROCESSOR_NOCONA:
12607 max_nonrep = 3;
12608 break;
12609 default:
12610 break;
12613 if (repcount <= max_nonrep)
12614 while (repcount-- > 0)
12616 rtx mem = adjust_automodify_address_nv (dst,
12617 GET_MODE (zeroreg),
12618 destreg, offset);
12619 emit_insn (gen_strset (destreg, mem, zeroreg));
12620 offset += size;
12622 else
12624 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12625 countreg = ix86_zero_extend_to_Pmode (countreg);
12626 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12627 GEN_INT (size == 4 ? 2 : 3));
12628 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12629 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12630 destexp));
12631 offset = count & ~(size - 1);
12634 if (size == 8 && (count & 0x04))
12636 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12637 offset);
12638 emit_insn (gen_strset (destreg, mem,
12639 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12640 offset += 4;
12642 if (count & 0x02)
12644 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12645 offset);
12646 emit_insn (gen_strset (destreg, mem,
12647 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12648 offset += 2;
12650 if (count & 0x01)
12652 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12653 offset);
12654 emit_insn (gen_strset (destreg, mem,
12655 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12658 else
12660 rtx countreg2;
12661 rtx label = NULL;
12662 /* Compute desired alignment of the string operation. */
12663 int desired_alignment = (TARGET_PENTIUMPRO
12664 && (count == 0 || count >= (unsigned int) 260)
12665 ? 8 : UNITS_PER_WORD);
12667 /* In case we don't know anything about the alignment, default to
12668 library version, since it is usually equally fast and result in
12669 shorter code.
12671 Also emit call when we know that the count is large and call overhead
12672 will not be important. */
12673 if (!TARGET_INLINE_ALL_STRINGOPS
12674 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12675 return 0;
12677 if (TARGET_SINGLE_STRINGOP)
12678 emit_insn (gen_cld ());
12680 countreg2 = gen_reg_rtx (Pmode);
12681 countreg = copy_to_mode_reg (counter_mode, count_exp);
12682 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12683 /* Get rid of MEM_OFFSET, it won't be accurate. */
12684 dst = change_address (dst, BLKmode, destreg);
12686 if (count == 0 && align < desired_alignment)
12688 label = gen_label_rtx ();
12689 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12690 LEU, 0, counter_mode, 1, label);
12692 if (align <= 1)
12694 rtx label = ix86_expand_aligntest (destreg, 1);
12695 emit_insn (gen_strset (destreg, dst,
12696 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12697 ix86_adjust_counter (countreg, 1);
12698 emit_label (label);
12699 LABEL_NUSES (label) = 1;
12701 if (align <= 2)
12703 rtx label = ix86_expand_aligntest (destreg, 2);
12704 emit_insn (gen_strset (destreg, dst,
12705 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12706 ix86_adjust_counter (countreg, 2);
12707 emit_label (label);
12708 LABEL_NUSES (label) = 1;
12710 if (align <= 4 && desired_alignment > 4)
12712 rtx label = ix86_expand_aligntest (destreg, 4);
12713 emit_insn (gen_strset (destreg, dst,
12714 (TARGET_64BIT
12715 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12716 : zeroreg)));
12717 ix86_adjust_counter (countreg, 4);
12718 emit_label (label);
12719 LABEL_NUSES (label) = 1;
12722 if (label && desired_alignment > 4 && !TARGET_64BIT)
12724 emit_label (label);
12725 LABEL_NUSES (label) = 1;
12726 label = NULL_RTX;
12729 if (!TARGET_SINGLE_STRINGOP)
12730 emit_insn (gen_cld ());
12731 if (TARGET_64BIT)
12733 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12734 GEN_INT (3)));
12735 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12737 else
12739 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12740 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12742 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12743 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
12745 if (label)
12747 emit_label (label);
12748 LABEL_NUSES (label) = 1;
12751 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12752 emit_insn (gen_strset (destreg, dst,
12753 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12754 if (TARGET_64BIT && (align <= 4 || count == 0))
12756 rtx label = ix86_expand_aligntest (countreg, 4);
12757 emit_insn (gen_strset (destreg, dst,
12758 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12759 emit_label (label);
12760 LABEL_NUSES (label) = 1;
12762 if (align > 2 && count != 0 && (count & 2))
12763 emit_insn (gen_strset (destreg, dst,
12764 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12765 if (align <= 2 || count == 0)
12767 rtx label = ix86_expand_aligntest (countreg, 2);
12768 emit_insn (gen_strset (destreg, dst,
12769 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12770 emit_label (label);
12771 LABEL_NUSES (label) = 1;
12773 if (align > 1 && count != 0 && (count & 1))
12774 emit_insn (gen_strset (destreg, dst,
12775 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12776 if (align <= 1 || count == 0)
12778 rtx label = ix86_expand_aligntest (countreg, 1);
12779 emit_insn (gen_strset (destreg, dst,
12780 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12781 emit_label (label);
12782 LABEL_NUSES (label) = 1;
12785 return 1;
12788 /* Expand strlen. */
12790 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
12792 rtx addr, scratch1, scratch2, scratch3, scratch4;
12794 /* The generic case of strlen expander is long. Avoid it's
12795 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
12797 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12798 && !TARGET_INLINE_ALL_STRINGOPS
12799 && !optimize_size
12800 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12801 return 0;
12803 addr = force_reg (Pmode, XEXP (src, 0));
12804 scratch1 = gen_reg_rtx (Pmode);
12806 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12807 && !optimize_size)
12809 /* Well it seems that some optimizer does not combine a call like
12810 foo(strlen(bar), strlen(bar));
12811 when the move and the subtraction is done here. It does calculate
12812 the length just once when these instructions are done inside of
12813 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
12814 often used and I use one fewer register for the lifetime of
12815 output_strlen_unroll() this is better. */
12817 emit_move_insn (out, addr);
12819 ix86_expand_strlensi_unroll_1 (out, src, align);
12821 /* strlensi_unroll_1 returns the address of the zero at the end of
12822 the string, like memchr(), so compute the length by subtracting
12823 the start address. */
12824 if (TARGET_64BIT)
12825 emit_insn (gen_subdi3 (out, out, addr));
12826 else
12827 emit_insn (gen_subsi3 (out, out, addr));
12829 else
12831 rtx unspec;
12832 scratch2 = gen_reg_rtx (Pmode);
12833 scratch3 = gen_reg_rtx (Pmode);
12834 scratch4 = force_reg (Pmode, constm1_rtx);
12836 emit_move_insn (scratch3, addr);
12837 eoschar = force_reg (QImode, eoschar);
12839 emit_insn (gen_cld ());
12840 src = replace_equiv_address_nv (src, scratch3);
12842 /* If .md starts supporting :P, this can be done in .md. */
12843 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12844 scratch4), UNSPEC_SCAS);
12845 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
12846 if (TARGET_64BIT)
12848 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12849 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12851 else
12853 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12854 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12857 return 1;
12860 /* Expand the appropriate insns for doing strlen if not just doing
12861 repnz; scasb
12863 out = result, initialized with the start address
12864 align_rtx = alignment of the address.
12865 scratch = scratch register, initialized with the startaddress when
12866 not aligned, otherwise undefined
12868 This is just the body. It needs the initializations mentioned above and
12869 some address computing at the end. These things are done in i386.md. */
12871 static void
12872 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12874 int align;
12875 rtx tmp;
12876 rtx align_2_label = NULL_RTX;
12877 rtx align_3_label = NULL_RTX;
12878 rtx align_4_label = gen_label_rtx ();
12879 rtx end_0_label = gen_label_rtx ();
12880 rtx mem;
12881 rtx tmpreg = gen_reg_rtx (SImode);
12882 rtx scratch = gen_reg_rtx (SImode);
12883 rtx cmp;
12885 align = 0;
12886 if (GET_CODE (align_rtx) == CONST_INT)
12887 align = INTVAL (align_rtx);
12889 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
12891 /* Is there a known alignment and is it less than 4? */
12892 if (align < 4)
12894 rtx scratch1 = gen_reg_rtx (Pmode);
12895 emit_move_insn (scratch1, out);
12896 /* Is there a known alignment and is it not 2? */
12897 if (align != 2)
12899 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12900 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12902 /* Leave just the 3 lower bits. */
12903 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12904 NULL_RTX, 0, OPTAB_WIDEN);
12906 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12907 Pmode, 1, align_4_label);
12908 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12909 Pmode, 1, align_2_label);
12910 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12911 Pmode, 1, align_3_label);
12913 else
12915 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12916 check if is aligned to 4 - byte. */
12918 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12919 NULL_RTX, 0, OPTAB_WIDEN);
12921 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12922 Pmode, 1, align_4_label);
12925 mem = change_address (src, QImode, out);
12927 /* Now compare the bytes. */
12929 /* Compare the first n unaligned byte on a byte per byte basis. */
12930 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
12931 QImode, 1, end_0_label);
12933 /* Increment the address. */
12934 if (TARGET_64BIT)
12935 emit_insn (gen_adddi3 (out, out, const1_rtx));
12936 else
12937 emit_insn (gen_addsi3 (out, out, const1_rtx));
12939 /* Not needed with an alignment of 2 */
12940 if (align != 2)
12942 emit_label (align_2_label);
12944 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12945 end_0_label);
12947 if (TARGET_64BIT)
12948 emit_insn (gen_adddi3 (out, out, const1_rtx));
12949 else
12950 emit_insn (gen_addsi3 (out, out, const1_rtx));
12952 emit_label (align_3_label);
12955 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12956 end_0_label);
12958 if (TARGET_64BIT)
12959 emit_insn (gen_adddi3 (out, out, const1_rtx));
12960 else
12961 emit_insn (gen_addsi3 (out, out, const1_rtx));
12964 /* Generate loop to check 4 bytes at a time. It is not a good idea to
12965 align this loop. It gives only huge programs, but does not help to
12966 speed up. */
12967 emit_label (align_4_label);
12969 mem = change_address (src, SImode, out);
12970 emit_move_insn (scratch, mem);
12971 if (TARGET_64BIT)
12972 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
12973 else
12974 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
12976 /* This formula yields a nonzero result iff one of the bytes is zero.
12977 This saves three branches inside loop and many cycles. */
12979 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
12980 emit_insn (gen_one_cmplsi2 (scratch, scratch));
12981 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
12982 emit_insn (gen_andsi3 (tmpreg, tmpreg,
12983 gen_int_mode (0x80808080, SImode)));
12984 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
12985 align_4_label);
12987 if (TARGET_CMOVE)
12989 rtx reg = gen_reg_rtx (SImode);
12990 rtx reg2 = gen_reg_rtx (Pmode);
12991 emit_move_insn (reg, tmpreg);
12992 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
12994 /* If zero is not in the first two bytes, move two bytes forward. */
12995 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12996 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12997 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12998 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
12999 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13000 reg,
13001 tmpreg)));
13002 /* Emit lea manually to avoid clobbering of flags. */
13003 emit_insn (gen_rtx_SET (SImode, reg2,
13004 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13006 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13007 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13008 emit_insn (gen_rtx_SET (VOIDmode, out,
13009 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13010 reg2,
13011 out)));
13014 else
13016 rtx end_2_label = gen_label_rtx ();
13017 /* Is zero in the first two bytes? */
13019 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13020 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13021 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13022 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13023 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13024 pc_rtx);
13025 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13026 JUMP_LABEL (tmp) = end_2_label;
13028 /* Not in the first two. Move two bytes forward. */
13029 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13030 if (TARGET_64BIT)
13031 emit_insn (gen_adddi3 (out, out, const2_rtx));
13032 else
13033 emit_insn (gen_addsi3 (out, out, const2_rtx));
13035 emit_label (end_2_label);
13039 /* Avoid branch in fixing the byte. */
13040 tmpreg = gen_lowpart (QImode, tmpreg);
13041 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13042 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13043 if (TARGET_64BIT)
13044 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13045 else
13046 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13048 emit_label (end_0_label);
13051 void
13052 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13053 rtx callarg2 ATTRIBUTE_UNUSED,
13054 rtx pop, int sibcall)
13056 rtx use = NULL, call;
13058 if (pop == const0_rtx)
13059 pop = NULL;
13060 gcc_assert (!TARGET_64BIT || !pop);
13062 #if TARGET_MACHO
13063 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13064 fnaddr = machopic_indirect_call_target (fnaddr);
13065 #else
13066 /* Static functions and indirect calls don't need the pic register. */
13067 if (! TARGET_64BIT && flag_pic
13068 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13069 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13070 use_reg (&use, pic_offset_table_rtx);
13072 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13074 rtx al = gen_rtx_REG (QImode, 0);
13075 emit_move_insn (al, callarg2);
13076 use_reg (&use, al);
13078 #endif /* TARGET_MACHO */
13080 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13082 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13083 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13085 if (sibcall && TARGET_64BIT
13086 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13088 rtx addr;
13089 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13090 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13091 emit_move_insn (fnaddr, addr);
13092 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13095 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13096 if (retval)
13097 call = gen_rtx_SET (VOIDmode, retval, call);
13098 if (pop)
13100 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13101 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13102 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13105 call = emit_call_insn (call);
13106 if (use)
13107 CALL_INSN_FUNCTION_USAGE (call) = use;
13111 /* Clear stack slot assignments remembered from previous functions.
13112 This is called from INIT_EXPANDERS once before RTL is emitted for each
13113 function. */
13115 static struct machine_function *
13116 ix86_init_machine_status (void)
13118 struct machine_function *f;
13120 f = ggc_alloc_cleared (sizeof (struct machine_function));
13121 f->use_fast_prologue_epilogue_nregs = -1;
13122 f->tls_descriptor_call_expanded_p = 0;
13124 return f;
13127 /* Return a MEM corresponding to a stack slot with mode MODE.
13128 Allocate a new slot if necessary.
13130 The RTL for a function can have several slots available: N is
13131 which slot to use. */
13134 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13136 struct stack_local_entry *s;
13138 gcc_assert (n < MAX_386_STACK_LOCALS);
13140 for (s = ix86_stack_locals; s; s = s->next)
13141 if (s->mode == mode && s->n == n)
13142 return s->rtl;
13144 s = (struct stack_local_entry *)
13145 ggc_alloc (sizeof (struct stack_local_entry));
13146 s->n = n;
13147 s->mode = mode;
13148 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13150 s->next = ix86_stack_locals;
13151 ix86_stack_locals = s;
13152 return s->rtl;
13155 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13157 static GTY(()) rtx ix86_tls_symbol;
13159 ix86_tls_get_addr (void)
13162 if (!ix86_tls_symbol)
13164 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13165 (TARGET_ANY_GNU_TLS
13166 && !TARGET_64BIT)
13167 ? "___tls_get_addr"
13168 : "__tls_get_addr");
13171 return ix86_tls_symbol;
13174 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13176 static GTY(()) rtx ix86_tls_module_base_symbol;
13178 ix86_tls_module_base (void)
13181 if (!ix86_tls_module_base_symbol)
13183 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13184 "_TLS_MODULE_BASE_");
13185 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13186 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13189 return ix86_tls_module_base_symbol;
13192 /* Calculate the length of the memory address in the instruction
13193 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13196 memory_address_length (rtx addr)
13198 struct ix86_address parts;
13199 rtx base, index, disp;
13200 int len;
13201 int ok;
13203 if (GET_CODE (addr) == PRE_DEC
13204 || GET_CODE (addr) == POST_INC
13205 || GET_CODE (addr) == PRE_MODIFY
13206 || GET_CODE (addr) == POST_MODIFY)
13207 return 0;
13209 ok = ix86_decompose_address (addr, &parts);
13210 gcc_assert (ok);
13212 if (parts.base && GET_CODE (parts.base) == SUBREG)
13213 parts.base = SUBREG_REG (parts.base);
13214 if (parts.index && GET_CODE (parts.index) == SUBREG)
13215 parts.index = SUBREG_REG (parts.index);
13217 base = parts.base;
13218 index = parts.index;
13219 disp = parts.disp;
13220 len = 0;
13222 /* Rule of thumb:
13223 - esp as the base always wants an index,
13224 - ebp as the base always wants a displacement. */
13226 /* Register Indirect. */
13227 if (base && !index && !disp)
13229 /* esp (for its index) and ebp (for its displacement) need
13230 the two-byte modrm form. */
13231 if (addr == stack_pointer_rtx
13232 || addr == arg_pointer_rtx
13233 || addr == frame_pointer_rtx
13234 || addr == hard_frame_pointer_rtx)
13235 len = 1;
13238 /* Direct Addressing. */
13239 else if (disp && !base && !index)
13240 len = 4;
13242 else
13244 /* Find the length of the displacement constant. */
13245 if (disp)
13247 if (GET_CODE (disp) == CONST_INT
13248 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
13249 && base)
13250 len = 1;
13251 else
13252 len = 4;
13254 /* ebp always wants a displacement. */
13255 else if (base == hard_frame_pointer_rtx)
13256 len = 1;
13258 /* An index requires the two-byte modrm form.... */
13259 if (index
13260 /* ...like esp, which always wants an index. */
13261 || base == stack_pointer_rtx
13262 || base == arg_pointer_rtx
13263 || base == frame_pointer_rtx)
13264 len += 1;
13267 return len;
13270 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13271 is set, expect that insn have 8bit immediate alternative. */
13273 ix86_attr_length_immediate_default (rtx insn, int shortform)
13275 int len = 0;
13276 int i;
13277 extract_insn_cached (insn);
13278 for (i = recog_data.n_operands - 1; i >= 0; --i)
13279 if (CONSTANT_P (recog_data.operand[i]))
13281 gcc_assert (!len);
13282 if (shortform
13283 && GET_CODE (recog_data.operand[i]) == CONST_INT
13284 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
13285 len = 1;
13286 else
13288 switch (get_attr_mode (insn))
13290 case MODE_QI:
13291 len+=1;
13292 break;
13293 case MODE_HI:
13294 len+=2;
13295 break;
13296 case MODE_SI:
13297 len+=4;
13298 break;
13299 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13300 case MODE_DI:
13301 len+=4;
13302 break;
13303 default:
13304 fatal_insn ("unknown insn mode", insn);
13308 return len;
13310 /* Compute default value for "length_address" attribute. */
13312 ix86_attr_length_address_default (rtx insn)
13314 int i;
13316 if (get_attr_type (insn) == TYPE_LEA)
13318 rtx set = PATTERN (insn);
13320 if (GET_CODE (set) == PARALLEL)
13321 set = XVECEXP (set, 0, 0);
13323 gcc_assert (GET_CODE (set) == SET);
13325 return memory_address_length (SET_SRC (set));
13328 extract_insn_cached (insn);
13329 for (i = recog_data.n_operands - 1; i >= 0; --i)
13330 if (GET_CODE (recog_data.operand[i]) == MEM)
13332 return memory_address_length (XEXP (recog_data.operand[i], 0));
13333 break;
13335 return 0;
13338 /* Return the maximum number of instructions a cpu can issue. */
13340 static int
13341 ix86_issue_rate (void)
13343 switch (ix86_tune)
13345 case PROCESSOR_PENTIUM:
13346 case PROCESSOR_K6:
13347 return 2;
13349 case PROCESSOR_PENTIUMPRO:
13350 case PROCESSOR_PENTIUM4:
13351 case PROCESSOR_ATHLON:
13352 case PROCESSOR_K8:
13353 case PROCESSOR_NOCONA:
13354 case PROCESSOR_GENERIC32:
13355 case PROCESSOR_GENERIC64:
13356 return 3;
13358 default:
13359 return 1;
13363 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13364 by DEP_INSN and nothing set by DEP_INSN. */
13366 static int
13367 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13369 rtx set, set2;
13371 /* Simplify the test for uninteresting insns. */
13372 if (insn_type != TYPE_SETCC
13373 && insn_type != TYPE_ICMOV
13374 && insn_type != TYPE_FCMOV
13375 && insn_type != TYPE_IBR)
13376 return 0;
13378 if ((set = single_set (dep_insn)) != 0)
13380 set = SET_DEST (set);
13381 set2 = NULL_RTX;
13383 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13384 && XVECLEN (PATTERN (dep_insn), 0) == 2
13385 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13386 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13388 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13389 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13391 else
13392 return 0;
13394 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13395 return 0;
13397 /* This test is true if the dependent insn reads the flags but
13398 not any other potentially set register. */
13399 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13400 return 0;
13402 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13403 return 0;
13405 return 1;
13408 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13409 address with operands set by DEP_INSN. */
13411 static int
13412 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13414 rtx addr;
13416 if (insn_type == TYPE_LEA
13417 && TARGET_PENTIUM)
13419 addr = PATTERN (insn);
13421 if (GET_CODE (addr) == PARALLEL)
13422 addr = XVECEXP (addr, 0, 0);
13424 gcc_assert (GET_CODE (addr) == SET);
13426 addr = SET_SRC (addr);
13428 else
13430 int i;
13431 extract_insn_cached (insn);
13432 for (i = recog_data.n_operands - 1; i >= 0; --i)
13433 if (GET_CODE (recog_data.operand[i]) == MEM)
13435 addr = XEXP (recog_data.operand[i], 0);
13436 goto found;
13438 return 0;
13439 found:;
13442 return modified_in_p (addr, dep_insn);
13445 static int
13446 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13448 enum attr_type insn_type, dep_insn_type;
13449 enum attr_memory memory;
13450 rtx set, set2;
13451 int dep_insn_code_number;
13453 /* Anti and output dependencies have zero cost on all CPUs. */
13454 if (REG_NOTE_KIND (link) != 0)
13455 return 0;
13457 dep_insn_code_number = recog_memoized (dep_insn);
13459 /* If we can't recognize the insns, we can't really do anything. */
13460 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13461 return cost;
13463 insn_type = get_attr_type (insn);
13464 dep_insn_type = get_attr_type (dep_insn);
13466 switch (ix86_tune)
13468 case PROCESSOR_PENTIUM:
13469 /* Address Generation Interlock adds a cycle of latency. */
13470 if (ix86_agi_dependant (insn, dep_insn, insn_type))
13471 cost += 1;
13473 /* ??? Compares pair with jump/setcc. */
13474 if (ix86_flags_dependant (insn, dep_insn, insn_type))
13475 cost = 0;
13477 /* Floating point stores require value to be ready one cycle earlier. */
13478 if (insn_type == TYPE_FMOV
13479 && get_attr_memory (insn) == MEMORY_STORE
13480 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13481 cost += 1;
13482 break;
13484 case PROCESSOR_PENTIUMPRO:
13485 memory = get_attr_memory (insn);
13487 /* INT->FP conversion is expensive. */
13488 if (get_attr_fp_int_src (dep_insn))
13489 cost += 5;
13491 /* There is one cycle extra latency between an FP op and a store. */
13492 if (insn_type == TYPE_FMOV
13493 && (set = single_set (dep_insn)) != NULL_RTX
13494 && (set2 = single_set (insn)) != NULL_RTX
13495 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13496 && GET_CODE (SET_DEST (set2)) == MEM)
13497 cost += 1;
13499 /* Show ability of reorder buffer to hide latency of load by executing
13500 in parallel with previous instruction in case
13501 previous instruction is not needed to compute the address. */
13502 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13503 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13505 /* Claim moves to take one cycle, as core can issue one load
13506 at time and the next load can start cycle later. */
13507 if (dep_insn_type == TYPE_IMOV
13508 || dep_insn_type == TYPE_FMOV)
13509 cost = 1;
13510 else if (cost > 1)
13511 cost--;
13513 break;
13515 case PROCESSOR_K6:
13516 memory = get_attr_memory (insn);
13518 /* The esp dependency is resolved before the instruction is really
13519 finished. */
13520 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13521 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13522 return 1;
13524 /* INT->FP conversion is expensive. */
13525 if (get_attr_fp_int_src (dep_insn))
13526 cost += 5;
13528 /* Show ability of reorder buffer to hide latency of load by executing
13529 in parallel with previous instruction in case
13530 previous instruction is not needed to compute the address. */
13531 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13532 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13534 /* Claim moves to take one cycle, as core can issue one load
13535 at time and the next load can start cycle later. */
13536 if (dep_insn_type == TYPE_IMOV
13537 || dep_insn_type == TYPE_FMOV)
13538 cost = 1;
13539 else if (cost > 2)
13540 cost -= 2;
13541 else
13542 cost = 1;
13544 break;
13546 case PROCESSOR_ATHLON:
13547 case PROCESSOR_K8:
13548 case PROCESSOR_GENERIC32:
13549 case PROCESSOR_GENERIC64:
13550 memory = get_attr_memory (insn);
13552 /* Show ability of reorder buffer to hide latency of load by executing
13553 in parallel with previous instruction in case
13554 previous instruction is not needed to compute the address. */
13555 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13556 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13558 enum attr_unit unit = get_attr_unit (insn);
13559 int loadcost = 3;
13561 /* Because of the difference between the length of integer and
13562 floating unit pipeline preparation stages, the memory operands
13563 for floating point are cheaper.
13565 ??? For Athlon it the difference is most probably 2. */
13566 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13567 loadcost = 3;
13568 else
13569 loadcost = TARGET_ATHLON ? 2 : 0;
13571 if (cost >= loadcost)
13572 cost -= loadcost;
13573 else
13574 cost = 0;
13577 default:
13578 break;
13581 return cost;
13584 /* How many alternative schedules to try. This should be as wide as the
13585 scheduling freedom in the DFA, but no wider. Making this value too
13586 large results extra work for the scheduler. */
13588 static int
13589 ia32_multipass_dfa_lookahead (void)
13591 if (ix86_tune == PROCESSOR_PENTIUM)
13592 return 2;
13594 if (ix86_tune == PROCESSOR_PENTIUMPRO
13595 || ix86_tune == PROCESSOR_K6)
13596 return 1;
13598 else
13599 return 0;
13603 /* Compute the alignment given to a constant that is being placed in memory.
13604 EXP is the constant and ALIGN is the alignment that the object would
13605 ordinarily have.
13606 The value of this function is used instead of that alignment to align
13607 the object. */
13610 ix86_constant_alignment (tree exp, int align)
13612 if (TREE_CODE (exp) == REAL_CST)
13614 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13615 return 64;
13616 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13617 return 128;
13619 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13620 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13621 return BITS_PER_WORD;
13623 return align;
13626 /* Compute the alignment for a static variable.
13627 TYPE is the data type, and ALIGN is the alignment that
13628 the object would ordinarily have. The value of this function is used
13629 instead of that alignment to align the object. */
13632 ix86_data_alignment (tree type, int align)
13634 int max_align = optimize_size ? BITS_PER_WORD : 256;
13636 if (AGGREGATE_TYPE_P (type)
13637 && TYPE_SIZE (type)
13638 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13639 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13640 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13641 && align < max_align)
13642 align = max_align;
13644 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13645 to 16byte boundary. */
13646 if (TARGET_64BIT)
13648 if (AGGREGATE_TYPE_P (type)
13649 && TYPE_SIZE (type)
13650 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13651 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13652 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13653 return 128;
13656 if (TREE_CODE (type) == ARRAY_TYPE)
13658 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13659 return 64;
13660 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13661 return 128;
13663 else if (TREE_CODE (type) == COMPLEX_TYPE)
13666 if (TYPE_MODE (type) == DCmode && align < 64)
13667 return 64;
13668 if (TYPE_MODE (type) == XCmode && align < 128)
13669 return 128;
13671 else if ((TREE_CODE (type) == RECORD_TYPE
13672 || TREE_CODE (type) == UNION_TYPE
13673 || TREE_CODE (type) == QUAL_UNION_TYPE)
13674 && TYPE_FIELDS (type))
13676 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13677 return 64;
13678 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13679 return 128;
13681 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13682 || TREE_CODE (type) == INTEGER_TYPE)
13684 if (TYPE_MODE (type) == DFmode && align < 64)
13685 return 64;
13686 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13687 return 128;
13690 return align;
13693 /* Compute the alignment for a local variable.
13694 TYPE is the data type, and ALIGN is the alignment that
13695 the object would ordinarily have. The value of this macro is used
13696 instead of that alignment to align the object. */
13699 ix86_local_alignment (tree type, int align)
13701 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13702 to 16byte boundary. */
13703 if (TARGET_64BIT)
13705 if (AGGREGATE_TYPE_P (type)
13706 && TYPE_SIZE (type)
13707 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13708 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13709 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13710 return 128;
13712 if (TREE_CODE (type) == ARRAY_TYPE)
13714 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13715 return 64;
13716 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13717 return 128;
13719 else if (TREE_CODE (type) == COMPLEX_TYPE)
13721 if (TYPE_MODE (type) == DCmode && align < 64)
13722 return 64;
13723 if (TYPE_MODE (type) == XCmode && align < 128)
13724 return 128;
13726 else if ((TREE_CODE (type) == RECORD_TYPE
13727 || TREE_CODE (type) == UNION_TYPE
13728 || TREE_CODE (type) == QUAL_UNION_TYPE)
13729 && TYPE_FIELDS (type))
13731 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13732 return 64;
13733 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13734 return 128;
13736 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13737 || TREE_CODE (type) == INTEGER_TYPE)
13740 if (TYPE_MODE (type) == DFmode && align < 64)
13741 return 64;
13742 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13743 return 128;
13745 return align;
13748 /* Emit RTL insns to initialize the variable parts of a trampoline.
13749 FNADDR is an RTX for the address of the function's pure code.
13750 CXT is an RTX for the static chain value for the function. */
13751 void
13752 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
13754 if (!TARGET_64BIT)
13756 /* Compute offset from the end of the jmp to the target function. */
13757 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
13758 plus_constant (tramp, 10),
13759 NULL_RTX, 1, OPTAB_DIRECT);
13760 emit_move_insn (gen_rtx_MEM (QImode, tramp),
13761 gen_int_mode (0xb9, QImode));
13762 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
13763 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
13764 gen_int_mode (0xe9, QImode));
13765 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
13767 else
13769 int offset = 0;
13770 /* Try to load address using shorter movl instead of movabs.
13771 We may want to support movq for kernel mode, but kernel does not use
13772 trampolines at the moment. */
13773 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
13775 fnaddr = copy_to_mode_reg (DImode, fnaddr);
13776 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13777 gen_int_mode (0xbb41, HImode));
13778 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
13779 gen_lowpart (SImode, fnaddr));
13780 offset += 6;
13782 else
13784 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13785 gen_int_mode (0xbb49, HImode));
13786 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13787 fnaddr);
13788 offset += 10;
13790 /* Load static chain using movabs to r10. */
13791 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13792 gen_int_mode (0xba49, HImode));
13793 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13794 cxt);
13795 offset += 10;
13796 /* Jump to the r11 */
13797 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13798 gen_int_mode (0xff49, HImode));
13799 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
13800 gen_int_mode (0xe3, QImode));
13801 offset += 3;
13802 gcc_assert (offset <= TRAMPOLINE_SIZE);
13805 #ifdef ENABLE_EXECUTE_STACK
13806 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
13807 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13808 #endif
13811 /* Codes for all the SSE/MMX builtins. */
13812 enum ix86_builtins
13814 IX86_BUILTIN_ADDPS,
13815 IX86_BUILTIN_ADDSS,
13816 IX86_BUILTIN_DIVPS,
13817 IX86_BUILTIN_DIVSS,
13818 IX86_BUILTIN_MULPS,
13819 IX86_BUILTIN_MULSS,
13820 IX86_BUILTIN_SUBPS,
13821 IX86_BUILTIN_SUBSS,
13823 IX86_BUILTIN_CMPEQPS,
13824 IX86_BUILTIN_CMPLTPS,
13825 IX86_BUILTIN_CMPLEPS,
13826 IX86_BUILTIN_CMPGTPS,
13827 IX86_BUILTIN_CMPGEPS,
13828 IX86_BUILTIN_CMPNEQPS,
13829 IX86_BUILTIN_CMPNLTPS,
13830 IX86_BUILTIN_CMPNLEPS,
13831 IX86_BUILTIN_CMPNGTPS,
13832 IX86_BUILTIN_CMPNGEPS,
13833 IX86_BUILTIN_CMPORDPS,
13834 IX86_BUILTIN_CMPUNORDPS,
13835 IX86_BUILTIN_CMPEQSS,
13836 IX86_BUILTIN_CMPLTSS,
13837 IX86_BUILTIN_CMPLESS,
13838 IX86_BUILTIN_CMPNEQSS,
13839 IX86_BUILTIN_CMPNLTSS,
13840 IX86_BUILTIN_CMPNLESS,
13841 IX86_BUILTIN_CMPNGTSS,
13842 IX86_BUILTIN_CMPNGESS,
13843 IX86_BUILTIN_CMPORDSS,
13844 IX86_BUILTIN_CMPUNORDSS,
13846 IX86_BUILTIN_COMIEQSS,
13847 IX86_BUILTIN_COMILTSS,
13848 IX86_BUILTIN_COMILESS,
13849 IX86_BUILTIN_COMIGTSS,
13850 IX86_BUILTIN_COMIGESS,
13851 IX86_BUILTIN_COMINEQSS,
13852 IX86_BUILTIN_UCOMIEQSS,
13853 IX86_BUILTIN_UCOMILTSS,
13854 IX86_BUILTIN_UCOMILESS,
13855 IX86_BUILTIN_UCOMIGTSS,
13856 IX86_BUILTIN_UCOMIGESS,
13857 IX86_BUILTIN_UCOMINEQSS,
13859 IX86_BUILTIN_CVTPI2PS,
13860 IX86_BUILTIN_CVTPS2PI,
13861 IX86_BUILTIN_CVTSI2SS,
13862 IX86_BUILTIN_CVTSI642SS,
13863 IX86_BUILTIN_CVTSS2SI,
13864 IX86_BUILTIN_CVTSS2SI64,
13865 IX86_BUILTIN_CVTTPS2PI,
13866 IX86_BUILTIN_CVTTSS2SI,
13867 IX86_BUILTIN_CVTTSS2SI64,
13869 IX86_BUILTIN_MAXPS,
13870 IX86_BUILTIN_MAXSS,
13871 IX86_BUILTIN_MINPS,
13872 IX86_BUILTIN_MINSS,
13874 IX86_BUILTIN_LOADUPS,
13875 IX86_BUILTIN_STOREUPS,
13876 IX86_BUILTIN_MOVSS,
13878 IX86_BUILTIN_MOVHLPS,
13879 IX86_BUILTIN_MOVLHPS,
13880 IX86_BUILTIN_LOADHPS,
13881 IX86_BUILTIN_LOADLPS,
13882 IX86_BUILTIN_STOREHPS,
13883 IX86_BUILTIN_STORELPS,
13885 IX86_BUILTIN_MASKMOVQ,
13886 IX86_BUILTIN_MOVMSKPS,
13887 IX86_BUILTIN_PMOVMSKB,
13889 IX86_BUILTIN_MOVNTPS,
13890 IX86_BUILTIN_MOVNTQ,
13892 IX86_BUILTIN_LOADDQU,
13893 IX86_BUILTIN_STOREDQU,
13895 IX86_BUILTIN_PACKSSWB,
13896 IX86_BUILTIN_PACKSSDW,
13897 IX86_BUILTIN_PACKUSWB,
13899 IX86_BUILTIN_PADDB,
13900 IX86_BUILTIN_PADDW,
13901 IX86_BUILTIN_PADDD,
13902 IX86_BUILTIN_PADDQ,
13903 IX86_BUILTIN_PADDSB,
13904 IX86_BUILTIN_PADDSW,
13905 IX86_BUILTIN_PADDUSB,
13906 IX86_BUILTIN_PADDUSW,
13907 IX86_BUILTIN_PSUBB,
13908 IX86_BUILTIN_PSUBW,
13909 IX86_BUILTIN_PSUBD,
13910 IX86_BUILTIN_PSUBQ,
13911 IX86_BUILTIN_PSUBSB,
13912 IX86_BUILTIN_PSUBSW,
13913 IX86_BUILTIN_PSUBUSB,
13914 IX86_BUILTIN_PSUBUSW,
13916 IX86_BUILTIN_PAND,
13917 IX86_BUILTIN_PANDN,
13918 IX86_BUILTIN_POR,
13919 IX86_BUILTIN_PXOR,
13921 IX86_BUILTIN_PAVGB,
13922 IX86_BUILTIN_PAVGW,
13924 IX86_BUILTIN_PCMPEQB,
13925 IX86_BUILTIN_PCMPEQW,
13926 IX86_BUILTIN_PCMPEQD,
13927 IX86_BUILTIN_PCMPGTB,
13928 IX86_BUILTIN_PCMPGTW,
13929 IX86_BUILTIN_PCMPGTD,
13931 IX86_BUILTIN_PMADDWD,
13933 IX86_BUILTIN_PMAXSW,
13934 IX86_BUILTIN_PMAXUB,
13935 IX86_BUILTIN_PMINSW,
13936 IX86_BUILTIN_PMINUB,
13938 IX86_BUILTIN_PMULHUW,
13939 IX86_BUILTIN_PMULHW,
13940 IX86_BUILTIN_PMULLW,
13942 IX86_BUILTIN_PSADBW,
13943 IX86_BUILTIN_PSHUFW,
13945 IX86_BUILTIN_PSLLW,
13946 IX86_BUILTIN_PSLLD,
13947 IX86_BUILTIN_PSLLQ,
13948 IX86_BUILTIN_PSRAW,
13949 IX86_BUILTIN_PSRAD,
13950 IX86_BUILTIN_PSRLW,
13951 IX86_BUILTIN_PSRLD,
13952 IX86_BUILTIN_PSRLQ,
13953 IX86_BUILTIN_PSLLWI,
13954 IX86_BUILTIN_PSLLDI,
13955 IX86_BUILTIN_PSLLQI,
13956 IX86_BUILTIN_PSRAWI,
13957 IX86_BUILTIN_PSRADI,
13958 IX86_BUILTIN_PSRLWI,
13959 IX86_BUILTIN_PSRLDI,
13960 IX86_BUILTIN_PSRLQI,
13962 IX86_BUILTIN_PUNPCKHBW,
13963 IX86_BUILTIN_PUNPCKHWD,
13964 IX86_BUILTIN_PUNPCKHDQ,
13965 IX86_BUILTIN_PUNPCKLBW,
13966 IX86_BUILTIN_PUNPCKLWD,
13967 IX86_BUILTIN_PUNPCKLDQ,
13969 IX86_BUILTIN_SHUFPS,
13971 IX86_BUILTIN_RCPPS,
13972 IX86_BUILTIN_RCPSS,
13973 IX86_BUILTIN_RSQRTPS,
13974 IX86_BUILTIN_RSQRTSS,
13975 IX86_BUILTIN_SQRTPS,
13976 IX86_BUILTIN_SQRTSS,
13978 IX86_BUILTIN_UNPCKHPS,
13979 IX86_BUILTIN_UNPCKLPS,
13981 IX86_BUILTIN_ANDPS,
13982 IX86_BUILTIN_ANDNPS,
13983 IX86_BUILTIN_ORPS,
13984 IX86_BUILTIN_XORPS,
13986 IX86_BUILTIN_EMMS,
13987 IX86_BUILTIN_LDMXCSR,
13988 IX86_BUILTIN_STMXCSR,
13989 IX86_BUILTIN_SFENCE,
13991 /* 3DNow! Original */
13992 IX86_BUILTIN_FEMMS,
13993 IX86_BUILTIN_PAVGUSB,
13994 IX86_BUILTIN_PF2ID,
13995 IX86_BUILTIN_PFACC,
13996 IX86_BUILTIN_PFADD,
13997 IX86_BUILTIN_PFCMPEQ,
13998 IX86_BUILTIN_PFCMPGE,
13999 IX86_BUILTIN_PFCMPGT,
14000 IX86_BUILTIN_PFMAX,
14001 IX86_BUILTIN_PFMIN,
14002 IX86_BUILTIN_PFMUL,
14003 IX86_BUILTIN_PFRCP,
14004 IX86_BUILTIN_PFRCPIT1,
14005 IX86_BUILTIN_PFRCPIT2,
14006 IX86_BUILTIN_PFRSQIT1,
14007 IX86_BUILTIN_PFRSQRT,
14008 IX86_BUILTIN_PFSUB,
14009 IX86_BUILTIN_PFSUBR,
14010 IX86_BUILTIN_PI2FD,
14011 IX86_BUILTIN_PMULHRW,
14013 /* 3DNow! Athlon Extensions */
14014 IX86_BUILTIN_PF2IW,
14015 IX86_BUILTIN_PFNACC,
14016 IX86_BUILTIN_PFPNACC,
14017 IX86_BUILTIN_PI2FW,
14018 IX86_BUILTIN_PSWAPDSI,
14019 IX86_BUILTIN_PSWAPDSF,
14021 /* SSE2 */
14022 IX86_BUILTIN_ADDPD,
14023 IX86_BUILTIN_ADDSD,
14024 IX86_BUILTIN_DIVPD,
14025 IX86_BUILTIN_DIVSD,
14026 IX86_BUILTIN_MULPD,
14027 IX86_BUILTIN_MULSD,
14028 IX86_BUILTIN_SUBPD,
14029 IX86_BUILTIN_SUBSD,
14031 IX86_BUILTIN_CMPEQPD,
14032 IX86_BUILTIN_CMPLTPD,
14033 IX86_BUILTIN_CMPLEPD,
14034 IX86_BUILTIN_CMPGTPD,
14035 IX86_BUILTIN_CMPGEPD,
14036 IX86_BUILTIN_CMPNEQPD,
14037 IX86_BUILTIN_CMPNLTPD,
14038 IX86_BUILTIN_CMPNLEPD,
14039 IX86_BUILTIN_CMPNGTPD,
14040 IX86_BUILTIN_CMPNGEPD,
14041 IX86_BUILTIN_CMPORDPD,
14042 IX86_BUILTIN_CMPUNORDPD,
14043 IX86_BUILTIN_CMPNEPD,
14044 IX86_BUILTIN_CMPEQSD,
14045 IX86_BUILTIN_CMPLTSD,
14046 IX86_BUILTIN_CMPLESD,
14047 IX86_BUILTIN_CMPNEQSD,
14048 IX86_BUILTIN_CMPNLTSD,
14049 IX86_BUILTIN_CMPNLESD,
14050 IX86_BUILTIN_CMPORDSD,
14051 IX86_BUILTIN_CMPUNORDSD,
14052 IX86_BUILTIN_CMPNESD,
14054 IX86_BUILTIN_COMIEQSD,
14055 IX86_BUILTIN_COMILTSD,
14056 IX86_BUILTIN_COMILESD,
14057 IX86_BUILTIN_COMIGTSD,
14058 IX86_BUILTIN_COMIGESD,
14059 IX86_BUILTIN_COMINEQSD,
14060 IX86_BUILTIN_UCOMIEQSD,
14061 IX86_BUILTIN_UCOMILTSD,
14062 IX86_BUILTIN_UCOMILESD,
14063 IX86_BUILTIN_UCOMIGTSD,
14064 IX86_BUILTIN_UCOMIGESD,
14065 IX86_BUILTIN_UCOMINEQSD,
14067 IX86_BUILTIN_MAXPD,
14068 IX86_BUILTIN_MAXSD,
14069 IX86_BUILTIN_MINPD,
14070 IX86_BUILTIN_MINSD,
14072 IX86_BUILTIN_ANDPD,
14073 IX86_BUILTIN_ANDNPD,
14074 IX86_BUILTIN_ORPD,
14075 IX86_BUILTIN_XORPD,
14077 IX86_BUILTIN_SQRTPD,
14078 IX86_BUILTIN_SQRTSD,
14080 IX86_BUILTIN_UNPCKHPD,
14081 IX86_BUILTIN_UNPCKLPD,
14083 IX86_BUILTIN_SHUFPD,
14085 IX86_BUILTIN_LOADUPD,
14086 IX86_BUILTIN_STOREUPD,
14087 IX86_BUILTIN_MOVSD,
14089 IX86_BUILTIN_LOADHPD,
14090 IX86_BUILTIN_LOADLPD,
14092 IX86_BUILTIN_CVTDQ2PD,
14093 IX86_BUILTIN_CVTDQ2PS,
14095 IX86_BUILTIN_CVTPD2DQ,
14096 IX86_BUILTIN_CVTPD2PI,
14097 IX86_BUILTIN_CVTPD2PS,
14098 IX86_BUILTIN_CVTTPD2DQ,
14099 IX86_BUILTIN_CVTTPD2PI,
14101 IX86_BUILTIN_CVTPI2PD,
14102 IX86_BUILTIN_CVTSI2SD,
14103 IX86_BUILTIN_CVTSI642SD,
14105 IX86_BUILTIN_CVTSD2SI,
14106 IX86_BUILTIN_CVTSD2SI64,
14107 IX86_BUILTIN_CVTSD2SS,
14108 IX86_BUILTIN_CVTSS2SD,
14109 IX86_BUILTIN_CVTTSD2SI,
14110 IX86_BUILTIN_CVTTSD2SI64,
14112 IX86_BUILTIN_CVTPS2DQ,
14113 IX86_BUILTIN_CVTPS2PD,
14114 IX86_BUILTIN_CVTTPS2DQ,
14116 IX86_BUILTIN_MOVNTI,
14117 IX86_BUILTIN_MOVNTPD,
14118 IX86_BUILTIN_MOVNTDQ,
14120 /* SSE2 MMX */
14121 IX86_BUILTIN_MASKMOVDQU,
14122 IX86_BUILTIN_MOVMSKPD,
14123 IX86_BUILTIN_PMOVMSKB128,
14125 IX86_BUILTIN_PACKSSWB128,
14126 IX86_BUILTIN_PACKSSDW128,
14127 IX86_BUILTIN_PACKUSWB128,
14129 IX86_BUILTIN_PADDB128,
14130 IX86_BUILTIN_PADDW128,
14131 IX86_BUILTIN_PADDD128,
14132 IX86_BUILTIN_PADDQ128,
14133 IX86_BUILTIN_PADDSB128,
14134 IX86_BUILTIN_PADDSW128,
14135 IX86_BUILTIN_PADDUSB128,
14136 IX86_BUILTIN_PADDUSW128,
14137 IX86_BUILTIN_PSUBB128,
14138 IX86_BUILTIN_PSUBW128,
14139 IX86_BUILTIN_PSUBD128,
14140 IX86_BUILTIN_PSUBQ128,
14141 IX86_BUILTIN_PSUBSB128,
14142 IX86_BUILTIN_PSUBSW128,
14143 IX86_BUILTIN_PSUBUSB128,
14144 IX86_BUILTIN_PSUBUSW128,
14146 IX86_BUILTIN_PAND128,
14147 IX86_BUILTIN_PANDN128,
14148 IX86_BUILTIN_POR128,
14149 IX86_BUILTIN_PXOR128,
14151 IX86_BUILTIN_PAVGB128,
14152 IX86_BUILTIN_PAVGW128,
14154 IX86_BUILTIN_PCMPEQB128,
14155 IX86_BUILTIN_PCMPEQW128,
14156 IX86_BUILTIN_PCMPEQD128,
14157 IX86_BUILTIN_PCMPGTB128,
14158 IX86_BUILTIN_PCMPGTW128,
14159 IX86_BUILTIN_PCMPGTD128,
14161 IX86_BUILTIN_PMADDWD128,
14163 IX86_BUILTIN_PMAXSW128,
14164 IX86_BUILTIN_PMAXUB128,
14165 IX86_BUILTIN_PMINSW128,
14166 IX86_BUILTIN_PMINUB128,
14168 IX86_BUILTIN_PMULUDQ,
14169 IX86_BUILTIN_PMULUDQ128,
14170 IX86_BUILTIN_PMULHUW128,
14171 IX86_BUILTIN_PMULHW128,
14172 IX86_BUILTIN_PMULLW128,
14174 IX86_BUILTIN_PSADBW128,
14175 IX86_BUILTIN_PSHUFHW,
14176 IX86_BUILTIN_PSHUFLW,
14177 IX86_BUILTIN_PSHUFD,
14179 IX86_BUILTIN_PSLLW128,
14180 IX86_BUILTIN_PSLLD128,
14181 IX86_BUILTIN_PSLLQ128,
14182 IX86_BUILTIN_PSRAW128,
14183 IX86_BUILTIN_PSRAD128,
14184 IX86_BUILTIN_PSRLW128,
14185 IX86_BUILTIN_PSRLD128,
14186 IX86_BUILTIN_PSRLQ128,
14187 IX86_BUILTIN_PSLLDQI128,
14188 IX86_BUILTIN_PSLLWI128,
14189 IX86_BUILTIN_PSLLDI128,
14190 IX86_BUILTIN_PSLLQI128,
14191 IX86_BUILTIN_PSRAWI128,
14192 IX86_BUILTIN_PSRADI128,
14193 IX86_BUILTIN_PSRLDQI128,
14194 IX86_BUILTIN_PSRLWI128,
14195 IX86_BUILTIN_PSRLDI128,
14196 IX86_BUILTIN_PSRLQI128,
14198 IX86_BUILTIN_PUNPCKHBW128,
14199 IX86_BUILTIN_PUNPCKHWD128,
14200 IX86_BUILTIN_PUNPCKHDQ128,
14201 IX86_BUILTIN_PUNPCKHQDQ128,
14202 IX86_BUILTIN_PUNPCKLBW128,
14203 IX86_BUILTIN_PUNPCKLWD128,
14204 IX86_BUILTIN_PUNPCKLDQ128,
14205 IX86_BUILTIN_PUNPCKLQDQ128,
14207 IX86_BUILTIN_CLFLUSH,
14208 IX86_BUILTIN_MFENCE,
14209 IX86_BUILTIN_LFENCE,
14211 /* Prescott New Instructions. */
14212 IX86_BUILTIN_ADDSUBPS,
14213 IX86_BUILTIN_HADDPS,
14214 IX86_BUILTIN_HSUBPS,
14215 IX86_BUILTIN_MOVSHDUP,
14216 IX86_BUILTIN_MOVSLDUP,
14217 IX86_BUILTIN_ADDSUBPD,
14218 IX86_BUILTIN_HADDPD,
14219 IX86_BUILTIN_HSUBPD,
14220 IX86_BUILTIN_LDDQU,
14222 IX86_BUILTIN_MONITOR,
14223 IX86_BUILTIN_MWAIT,
14225 IX86_BUILTIN_VEC_INIT_V2SI,
14226 IX86_BUILTIN_VEC_INIT_V4HI,
14227 IX86_BUILTIN_VEC_INIT_V8QI,
14228 IX86_BUILTIN_VEC_EXT_V2DF,
14229 IX86_BUILTIN_VEC_EXT_V2DI,
14230 IX86_BUILTIN_VEC_EXT_V4SF,
14231 IX86_BUILTIN_VEC_EXT_V4SI,
14232 IX86_BUILTIN_VEC_EXT_V8HI,
14233 IX86_BUILTIN_VEC_EXT_V2SI,
14234 IX86_BUILTIN_VEC_EXT_V4HI,
14235 IX86_BUILTIN_VEC_SET_V8HI,
14236 IX86_BUILTIN_VEC_SET_V4HI,
14238 IX86_BUILTIN_MAX
14241 #define def_builtin(MASK, NAME, TYPE, CODE) \
14242 do { \
14243 if ((MASK) & target_flags \
14244 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14245 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14246 NULL, NULL_TREE); \
14247 } while (0)
14249 /* Bits for builtin_description.flag. */
14251 /* Set when we don't support the comparison natively, and should
14252 swap_comparison in order to support it. */
14253 #define BUILTIN_DESC_SWAP_OPERANDS 1
14255 struct builtin_description
14257 const unsigned int mask;
14258 const enum insn_code icode;
14259 const char *const name;
14260 const enum ix86_builtins code;
14261 const enum rtx_code comparison;
14262 const unsigned int flag;
14265 static const struct builtin_description bdesc_comi[] =
14267 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14268 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14269 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14270 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14271 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14272 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14273 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14274 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14275 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14276 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14277 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14278 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14279 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14280 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14281 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14282 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14283 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14284 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14285 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14286 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14287 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14288 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14289 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14290 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14293 static const struct builtin_description bdesc_2arg[] =
14295 /* SSE */
14296 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14297 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14298 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14299 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14300 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14301 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14302 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14303 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14305 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14306 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14307 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14308 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14309 BUILTIN_DESC_SWAP_OPERANDS },
14310 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14311 BUILTIN_DESC_SWAP_OPERANDS },
14312 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14313 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14314 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14315 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14316 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14317 BUILTIN_DESC_SWAP_OPERANDS },
14318 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14319 BUILTIN_DESC_SWAP_OPERANDS },
14320 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14321 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14322 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14323 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14324 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14325 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14326 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14327 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14328 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14329 BUILTIN_DESC_SWAP_OPERANDS },
14330 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14331 BUILTIN_DESC_SWAP_OPERANDS },
14332 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14334 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14335 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14336 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14337 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14339 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14340 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14341 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14342 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14344 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14345 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14346 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14347 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14348 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14350 /* MMX */
14351 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14352 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14353 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14354 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14355 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14356 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14357 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14358 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14360 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14361 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14362 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14363 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14364 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14365 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14366 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14367 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14369 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14370 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14371 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14373 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14374 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14375 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14376 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14378 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14379 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14381 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14382 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14383 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14384 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14385 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14386 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14388 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14389 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14390 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14391 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14393 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14394 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14395 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14396 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14397 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14398 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14400 /* Special. */
14401 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14402 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14403 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14405 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14406 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14407 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14409 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14410 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14411 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14412 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14413 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14414 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14416 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14417 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14418 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14419 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14420 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14421 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14423 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14424 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14425 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14426 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14428 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14429 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14431 /* SSE2 */
14432 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14433 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14434 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14435 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14436 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14437 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14438 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14439 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14441 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14442 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14443 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14444 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14445 BUILTIN_DESC_SWAP_OPERANDS },
14446 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14447 BUILTIN_DESC_SWAP_OPERANDS },
14448 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14449 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14450 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14451 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14452 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14453 BUILTIN_DESC_SWAP_OPERANDS },
14454 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14455 BUILTIN_DESC_SWAP_OPERANDS },
14456 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14457 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14458 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14459 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14460 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14461 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14462 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14463 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14464 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14466 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14467 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14468 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14469 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14471 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14472 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14473 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14474 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14476 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14477 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14478 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14480 /* SSE2 MMX */
14481 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14482 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14483 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14484 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14485 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14486 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14487 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14488 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14490 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14491 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14492 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14493 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14494 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14495 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14496 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14497 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14499 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14500 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14502 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14503 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14504 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14505 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14507 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14508 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14510 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14511 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14512 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14513 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14514 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14515 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14517 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14518 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14519 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14520 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14522 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14523 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14524 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14525 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14526 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14527 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14528 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14529 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14531 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14532 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14533 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14535 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14536 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14538 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14539 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14541 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14542 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14543 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14545 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14546 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14547 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14549 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14550 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14552 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14554 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14555 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14556 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14557 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14559 /* SSE3 MMX */
14560 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14561 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14562 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14563 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14564 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14565 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14568 static const struct builtin_description bdesc_1arg[] =
14570 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14571 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14573 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14574 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14575 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14577 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14578 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14579 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14580 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14581 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14582 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14584 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14585 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14587 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14589 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14590 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14592 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14593 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14594 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14595 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14596 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14598 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14600 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14601 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14602 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14603 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14605 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14606 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14607 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14609 /* SSE3 */
14610 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14611 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14614 static void
14615 ix86_init_builtins (void)
14617 if (TARGET_MMX)
14618 ix86_init_mmx_sse_builtins ();
14621 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14622 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14623 builtins. */
14624 static void
14625 ix86_init_mmx_sse_builtins (void)
14627 const struct builtin_description * d;
14628 size_t i;
14630 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14631 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14632 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14633 tree V2DI_type_node
14634 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14635 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14636 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14637 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14638 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14639 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14640 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14642 tree pchar_type_node = build_pointer_type (char_type_node);
14643 tree pcchar_type_node = build_pointer_type (
14644 build_type_variant (char_type_node, 1, 0));
14645 tree pfloat_type_node = build_pointer_type (float_type_node);
14646 tree pcfloat_type_node = build_pointer_type (
14647 build_type_variant (float_type_node, 1, 0));
14648 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14649 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14650 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14652 /* Comparisons. */
14653 tree int_ftype_v4sf_v4sf
14654 = build_function_type_list (integer_type_node,
14655 V4SF_type_node, V4SF_type_node, NULL_TREE);
14656 tree v4si_ftype_v4sf_v4sf
14657 = build_function_type_list (V4SI_type_node,
14658 V4SF_type_node, V4SF_type_node, NULL_TREE);
14659 /* MMX/SSE/integer conversions. */
14660 tree int_ftype_v4sf
14661 = build_function_type_list (integer_type_node,
14662 V4SF_type_node, NULL_TREE);
14663 tree int64_ftype_v4sf
14664 = build_function_type_list (long_long_integer_type_node,
14665 V4SF_type_node, NULL_TREE);
14666 tree int_ftype_v8qi
14667 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14668 tree v4sf_ftype_v4sf_int
14669 = build_function_type_list (V4SF_type_node,
14670 V4SF_type_node, integer_type_node, NULL_TREE);
14671 tree v4sf_ftype_v4sf_int64
14672 = build_function_type_list (V4SF_type_node,
14673 V4SF_type_node, long_long_integer_type_node,
14674 NULL_TREE);
14675 tree v4sf_ftype_v4sf_v2si
14676 = build_function_type_list (V4SF_type_node,
14677 V4SF_type_node, V2SI_type_node, NULL_TREE);
14679 /* Miscellaneous. */
14680 tree v8qi_ftype_v4hi_v4hi
14681 = build_function_type_list (V8QI_type_node,
14682 V4HI_type_node, V4HI_type_node, NULL_TREE);
14683 tree v4hi_ftype_v2si_v2si
14684 = build_function_type_list (V4HI_type_node,
14685 V2SI_type_node, V2SI_type_node, NULL_TREE);
14686 tree v4sf_ftype_v4sf_v4sf_int
14687 = build_function_type_list (V4SF_type_node,
14688 V4SF_type_node, V4SF_type_node,
14689 integer_type_node, NULL_TREE);
14690 tree v2si_ftype_v4hi_v4hi
14691 = build_function_type_list (V2SI_type_node,
14692 V4HI_type_node, V4HI_type_node, NULL_TREE);
14693 tree v4hi_ftype_v4hi_int
14694 = build_function_type_list (V4HI_type_node,
14695 V4HI_type_node, integer_type_node, NULL_TREE);
14696 tree v4hi_ftype_v4hi_di
14697 = build_function_type_list (V4HI_type_node,
14698 V4HI_type_node, long_long_unsigned_type_node,
14699 NULL_TREE);
14700 tree v2si_ftype_v2si_di
14701 = build_function_type_list (V2SI_type_node,
14702 V2SI_type_node, long_long_unsigned_type_node,
14703 NULL_TREE);
14704 tree void_ftype_void
14705 = build_function_type (void_type_node, void_list_node);
14706 tree void_ftype_unsigned
14707 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14708 tree void_ftype_unsigned_unsigned
14709 = build_function_type_list (void_type_node, unsigned_type_node,
14710 unsigned_type_node, NULL_TREE);
14711 tree void_ftype_pcvoid_unsigned_unsigned
14712 = build_function_type_list (void_type_node, const_ptr_type_node,
14713 unsigned_type_node, unsigned_type_node,
14714 NULL_TREE);
14715 tree unsigned_ftype_void
14716 = build_function_type (unsigned_type_node, void_list_node);
14717 tree v2si_ftype_v4sf
14718 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14719 /* Loads/stores. */
14720 tree void_ftype_v8qi_v8qi_pchar
14721 = build_function_type_list (void_type_node,
14722 V8QI_type_node, V8QI_type_node,
14723 pchar_type_node, NULL_TREE);
14724 tree v4sf_ftype_pcfloat
14725 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14726 /* @@@ the type is bogus */
14727 tree v4sf_ftype_v4sf_pv2si
14728 = build_function_type_list (V4SF_type_node,
14729 V4SF_type_node, pv2si_type_node, NULL_TREE);
14730 tree void_ftype_pv2si_v4sf
14731 = build_function_type_list (void_type_node,
14732 pv2si_type_node, V4SF_type_node, NULL_TREE);
14733 tree void_ftype_pfloat_v4sf
14734 = build_function_type_list (void_type_node,
14735 pfloat_type_node, V4SF_type_node, NULL_TREE);
14736 tree void_ftype_pdi_di
14737 = build_function_type_list (void_type_node,
14738 pdi_type_node, long_long_unsigned_type_node,
14739 NULL_TREE);
14740 tree void_ftype_pv2di_v2di
14741 = build_function_type_list (void_type_node,
14742 pv2di_type_node, V2DI_type_node, NULL_TREE);
14743 /* Normal vector unops. */
14744 tree v4sf_ftype_v4sf
14745 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14747 /* Normal vector binops. */
14748 tree v4sf_ftype_v4sf_v4sf
14749 = build_function_type_list (V4SF_type_node,
14750 V4SF_type_node, V4SF_type_node, NULL_TREE);
14751 tree v8qi_ftype_v8qi_v8qi
14752 = build_function_type_list (V8QI_type_node,
14753 V8QI_type_node, V8QI_type_node, NULL_TREE);
14754 tree v4hi_ftype_v4hi_v4hi
14755 = build_function_type_list (V4HI_type_node,
14756 V4HI_type_node, V4HI_type_node, NULL_TREE);
14757 tree v2si_ftype_v2si_v2si
14758 = build_function_type_list (V2SI_type_node,
14759 V2SI_type_node, V2SI_type_node, NULL_TREE);
14760 tree di_ftype_di_di
14761 = build_function_type_list (long_long_unsigned_type_node,
14762 long_long_unsigned_type_node,
14763 long_long_unsigned_type_node, NULL_TREE);
14765 tree v2si_ftype_v2sf
14766 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
14767 tree v2sf_ftype_v2si
14768 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
14769 tree v2si_ftype_v2si
14770 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
14771 tree v2sf_ftype_v2sf
14772 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
14773 tree v2sf_ftype_v2sf_v2sf
14774 = build_function_type_list (V2SF_type_node,
14775 V2SF_type_node, V2SF_type_node, NULL_TREE);
14776 tree v2si_ftype_v2sf_v2sf
14777 = build_function_type_list (V2SI_type_node,
14778 V2SF_type_node, V2SF_type_node, NULL_TREE);
14779 tree pint_type_node = build_pointer_type (integer_type_node);
14780 tree pdouble_type_node = build_pointer_type (double_type_node);
14781 tree pcdouble_type_node = build_pointer_type (
14782 build_type_variant (double_type_node, 1, 0));
14783 tree int_ftype_v2df_v2df
14784 = build_function_type_list (integer_type_node,
14785 V2DF_type_node, V2DF_type_node, NULL_TREE);
14787 tree void_ftype_pcvoid
14788 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
14789 tree v4sf_ftype_v4si
14790 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
14791 tree v4si_ftype_v4sf
14792 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
14793 tree v2df_ftype_v4si
14794 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
14795 tree v4si_ftype_v2df
14796 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
14797 tree v2si_ftype_v2df
14798 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
14799 tree v4sf_ftype_v2df
14800 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
14801 tree v2df_ftype_v2si
14802 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
14803 tree v2df_ftype_v4sf
14804 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
14805 tree int_ftype_v2df
14806 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
14807 tree int64_ftype_v2df
14808 = build_function_type_list (long_long_integer_type_node,
14809 V2DF_type_node, NULL_TREE);
14810 tree v2df_ftype_v2df_int
14811 = build_function_type_list (V2DF_type_node,
14812 V2DF_type_node, integer_type_node, NULL_TREE);
14813 tree v2df_ftype_v2df_int64
14814 = build_function_type_list (V2DF_type_node,
14815 V2DF_type_node, long_long_integer_type_node,
14816 NULL_TREE);
14817 tree v4sf_ftype_v4sf_v2df
14818 = build_function_type_list (V4SF_type_node,
14819 V4SF_type_node, V2DF_type_node, NULL_TREE);
14820 tree v2df_ftype_v2df_v4sf
14821 = build_function_type_list (V2DF_type_node,
14822 V2DF_type_node, V4SF_type_node, NULL_TREE);
14823 tree v2df_ftype_v2df_v2df_int
14824 = build_function_type_list (V2DF_type_node,
14825 V2DF_type_node, V2DF_type_node,
14826 integer_type_node,
14827 NULL_TREE);
14828 tree v2df_ftype_v2df_pcdouble
14829 = build_function_type_list (V2DF_type_node,
14830 V2DF_type_node, pcdouble_type_node, NULL_TREE);
14831 tree void_ftype_pdouble_v2df
14832 = build_function_type_list (void_type_node,
14833 pdouble_type_node, V2DF_type_node, NULL_TREE);
14834 tree void_ftype_pint_int
14835 = build_function_type_list (void_type_node,
14836 pint_type_node, integer_type_node, NULL_TREE);
14837 tree void_ftype_v16qi_v16qi_pchar
14838 = build_function_type_list (void_type_node,
14839 V16QI_type_node, V16QI_type_node,
14840 pchar_type_node, NULL_TREE);
14841 tree v2df_ftype_pcdouble
14842 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
14843 tree v2df_ftype_v2df_v2df
14844 = build_function_type_list (V2DF_type_node,
14845 V2DF_type_node, V2DF_type_node, NULL_TREE);
14846 tree v16qi_ftype_v16qi_v16qi
14847 = build_function_type_list (V16QI_type_node,
14848 V16QI_type_node, V16QI_type_node, NULL_TREE);
14849 tree v8hi_ftype_v8hi_v8hi
14850 = build_function_type_list (V8HI_type_node,
14851 V8HI_type_node, V8HI_type_node, NULL_TREE);
14852 tree v4si_ftype_v4si_v4si
14853 = build_function_type_list (V4SI_type_node,
14854 V4SI_type_node, V4SI_type_node, NULL_TREE);
14855 tree v2di_ftype_v2di_v2di
14856 = build_function_type_list (V2DI_type_node,
14857 V2DI_type_node, V2DI_type_node, NULL_TREE);
14858 tree v2di_ftype_v2df_v2df
14859 = build_function_type_list (V2DI_type_node,
14860 V2DF_type_node, V2DF_type_node, NULL_TREE);
14861 tree v2df_ftype_v2df
14862 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14863 tree v2di_ftype_v2di_int
14864 = build_function_type_list (V2DI_type_node,
14865 V2DI_type_node, integer_type_node, NULL_TREE);
14866 tree v4si_ftype_v4si_int
14867 = build_function_type_list (V4SI_type_node,
14868 V4SI_type_node, integer_type_node, NULL_TREE);
14869 tree v8hi_ftype_v8hi_int
14870 = build_function_type_list (V8HI_type_node,
14871 V8HI_type_node, integer_type_node, NULL_TREE);
14872 tree v8hi_ftype_v8hi_v2di
14873 = build_function_type_list (V8HI_type_node,
14874 V8HI_type_node, V2DI_type_node, NULL_TREE);
14875 tree v4si_ftype_v4si_v2di
14876 = build_function_type_list (V4SI_type_node,
14877 V4SI_type_node, V2DI_type_node, NULL_TREE);
14878 tree v4si_ftype_v8hi_v8hi
14879 = build_function_type_list (V4SI_type_node,
14880 V8HI_type_node, V8HI_type_node, NULL_TREE);
14881 tree di_ftype_v8qi_v8qi
14882 = build_function_type_list (long_long_unsigned_type_node,
14883 V8QI_type_node, V8QI_type_node, NULL_TREE);
14884 tree di_ftype_v2si_v2si
14885 = build_function_type_list (long_long_unsigned_type_node,
14886 V2SI_type_node, V2SI_type_node, NULL_TREE);
14887 tree v2di_ftype_v16qi_v16qi
14888 = build_function_type_list (V2DI_type_node,
14889 V16QI_type_node, V16QI_type_node, NULL_TREE);
14890 tree v2di_ftype_v4si_v4si
14891 = build_function_type_list (V2DI_type_node,
14892 V4SI_type_node, V4SI_type_node, NULL_TREE);
14893 tree int_ftype_v16qi
14894 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14895 tree v16qi_ftype_pcchar
14896 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14897 tree void_ftype_pchar_v16qi
14898 = build_function_type_list (void_type_node,
14899 pchar_type_node, V16QI_type_node, NULL_TREE);
14901 tree float80_type;
14902 tree float128_type;
14903 tree ftype;
14905 /* The __float80 type. */
14906 if (TYPE_MODE (long_double_type_node) == XFmode)
14907 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14908 "__float80");
14909 else
14911 /* The __float80 type. */
14912 float80_type = make_node (REAL_TYPE);
14913 TYPE_PRECISION (float80_type) = 80;
14914 layout_type (float80_type);
14915 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
14918 if (TARGET_64BIT)
14920 float128_type = make_node (REAL_TYPE);
14921 TYPE_PRECISION (float128_type) = 128;
14922 layout_type (float128_type);
14923 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
14926 /* Add all builtins that are more or less simple operations on two
14927 operands. */
14928 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14930 /* Use one of the operands; the target can have a different mode for
14931 mask-generating compares. */
14932 enum machine_mode mode;
14933 tree type;
14935 if (d->name == 0)
14936 continue;
14937 mode = insn_data[d->icode].operand[1].mode;
14939 switch (mode)
14941 case V16QImode:
14942 type = v16qi_ftype_v16qi_v16qi;
14943 break;
14944 case V8HImode:
14945 type = v8hi_ftype_v8hi_v8hi;
14946 break;
14947 case V4SImode:
14948 type = v4si_ftype_v4si_v4si;
14949 break;
14950 case V2DImode:
14951 type = v2di_ftype_v2di_v2di;
14952 break;
14953 case V2DFmode:
14954 type = v2df_ftype_v2df_v2df;
14955 break;
14956 case V4SFmode:
14957 type = v4sf_ftype_v4sf_v4sf;
14958 break;
14959 case V8QImode:
14960 type = v8qi_ftype_v8qi_v8qi;
14961 break;
14962 case V4HImode:
14963 type = v4hi_ftype_v4hi_v4hi;
14964 break;
14965 case V2SImode:
14966 type = v2si_ftype_v2si_v2si;
14967 break;
14968 case DImode:
14969 type = di_ftype_di_di;
14970 break;
14972 default:
14973 gcc_unreachable ();
14976 /* Override for comparisons. */
14977 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14978 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
14979 type = v4si_ftype_v4sf_v4sf;
14981 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
14982 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14983 type = v2di_ftype_v2df_v2df;
14985 def_builtin (d->mask, d->name, type, d->code);
14988 /* Add the remaining MMX insns with somewhat more complicated types. */
14989 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
14990 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
14991 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
14992 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
14994 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
14995 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
14996 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
14998 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
14999 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15001 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15002 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15004 /* comi/ucomi insns. */
15005 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15006 if (d->mask == MASK_SSE2)
15007 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15008 else
15009 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15011 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15012 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15013 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15015 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15016 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15017 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15018 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15019 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15020 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15021 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15022 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15023 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15024 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15025 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15027 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15029 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15030 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15032 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15033 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15034 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15035 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15037 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15038 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15039 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15040 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15042 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15044 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15046 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15047 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15048 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15049 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15050 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15051 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15053 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15055 /* Original 3DNow! */
15056 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15057 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15058 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15059 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15060 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15061 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15062 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15063 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15064 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15065 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15066 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15067 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15068 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15069 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15070 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15071 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15072 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15073 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15074 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15075 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15077 /* 3DNow! extension as used in the Athlon CPU. */
15078 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15079 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15080 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15081 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15082 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15083 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15085 /* SSE2 */
15086 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15088 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15089 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15091 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15092 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15094 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15095 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15096 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15097 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15098 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15100 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15101 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15102 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15103 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15105 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15106 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15108 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15110 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15111 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15113 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15114 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15115 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15116 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15117 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15119 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15121 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15122 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15123 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15124 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15126 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15127 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15128 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15130 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15131 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15132 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15133 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15135 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15136 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15137 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15139 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15140 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15142 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15143 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15145 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15146 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15147 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15149 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15150 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15151 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15153 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15154 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15156 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15157 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15158 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15159 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15161 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15162 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15163 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15164 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15166 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15167 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15169 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15171 /* Prescott New Instructions. */
15172 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15173 void_ftype_pcvoid_unsigned_unsigned,
15174 IX86_BUILTIN_MONITOR);
15175 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15176 void_ftype_unsigned_unsigned,
15177 IX86_BUILTIN_MWAIT);
15178 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15179 v4sf_ftype_v4sf,
15180 IX86_BUILTIN_MOVSHDUP);
15181 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15182 v4sf_ftype_v4sf,
15183 IX86_BUILTIN_MOVSLDUP);
15184 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15185 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15187 /* Access to the vec_init patterns. */
15188 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15189 integer_type_node, NULL_TREE);
15190 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15191 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15193 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15194 short_integer_type_node,
15195 short_integer_type_node,
15196 short_integer_type_node, NULL_TREE);
15197 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15198 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15200 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15201 char_type_node, char_type_node,
15202 char_type_node, char_type_node,
15203 char_type_node, char_type_node,
15204 char_type_node, NULL_TREE);
15205 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15206 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15208 /* Access to the vec_extract patterns. */
15209 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15210 integer_type_node, NULL_TREE);
15211 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15212 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15214 ftype = build_function_type_list (long_long_integer_type_node,
15215 V2DI_type_node, integer_type_node,
15216 NULL_TREE);
15217 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15218 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15220 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15221 integer_type_node, NULL_TREE);
15222 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15223 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15225 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15226 integer_type_node, NULL_TREE);
15227 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15228 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15230 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15231 integer_type_node, NULL_TREE);
15232 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15233 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15235 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15236 integer_type_node, NULL_TREE);
15237 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15238 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15240 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15241 integer_type_node, NULL_TREE);
15242 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15243 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15245 /* Access to the vec_set patterns. */
15246 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15247 intHI_type_node,
15248 integer_type_node, NULL_TREE);
15249 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15250 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15252 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15253 intHI_type_node,
15254 integer_type_node, NULL_TREE);
15255 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15256 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15259 /* Errors in the source file can cause expand_expr to return const0_rtx
15260 where we expect a vector. To avoid crashing, use one of the vector
15261 clear instructions. */
15262 static rtx
15263 safe_vector_operand (rtx x, enum machine_mode mode)
15265 if (x == const0_rtx)
15266 x = CONST0_RTX (mode);
15267 return x;
15270 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15272 static rtx
15273 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15275 rtx pat, xops[3];
15276 tree arg0 = TREE_VALUE (arglist);
15277 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15278 rtx op0 = expand_normal (arg0);
15279 rtx op1 = expand_normal (arg1);
15280 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15281 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15282 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15284 if (VECTOR_MODE_P (mode0))
15285 op0 = safe_vector_operand (op0, mode0);
15286 if (VECTOR_MODE_P (mode1))
15287 op1 = safe_vector_operand (op1, mode1);
15289 if (optimize || !target
15290 || GET_MODE (target) != tmode
15291 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15292 target = gen_reg_rtx (tmode);
15294 if (GET_MODE (op1) == SImode && mode1 == TImode)
15296 rtx x = gen_reg_rtx (V4SImode);
15297 emit_insn (gen_sse2_loadd (x, op1));
15298 op1 = gen_lowpart (TImode, x);
15301 /* The insn must want input operands in the same modes as the
15302 result. */
15303 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15304 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15306 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15307 op0 = copy_to_mode_reg (mode0, op0);
15308 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15309 op1 = copy_to_mode_reg (mode1, op1);
15311 /* ??? Using ix86_fixup_binary_operands is problematic when
15312 we've got mismatched modes. Fake it. */
15314 xops[0] = target;
15315 xops[1] = op0;
15316 xops[2] = op1;
15318 if (tmode == mode0 && tmode == mode1)
15320 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15321 op0 = xops[1];
15322 op1 = xops[2];
15324 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15326 op0 = force_reg (mode0, op0);
15327 op1 = force_reg (mode1, op1);
15328 target = gen_reg_rtx (tmode);
15331 pat = GEN_FCN (icode) (target, op0, op1);
15332 if (! pat)
15333 return 0;
15334 emit_insn (pat);
15335 return target;
15338 /* Subroutine of ix86_expand_builtin to take care of stores. */
15340 static rtx
15341 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15343 rtx pat;
15344 tree arg0 = TREE_VALUE (arglist);
15345 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15346 rtx op0 = expand_normal (arg0);
15347 rtx op1 = expand_normal (arg1);
15348 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15349 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15351 if (VECTOR_MODE_P (mode1))
15352 op1 = safe_vector_operand (op1, mode1);
15354 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15355 op1 = copy_to_mode_reg (mode1, op1);
15357 pat = GEN_FCN (icode) (op0, op1);
15358 if (pat)
15359 emit_insn (pat);
15360 return 0;
15363 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15365 static rtx
15366 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15367 rtx target, int do_load)
15369 rtx pat;
15370 tree arg0 = TREE_VALUE (arglist);
15371 rtx op0 = expand_normal (arg0);
15372 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15373 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15375 if (optimize || !target
15376 || GET_MODE (target) != tmode
15377 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15378 target = gen_reg_rtx (tmode);
15379 if (do_load)
15380 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15381 else
15383 if (VECTOR_MODE_P (mode0))
15384 op0 = safe_vector_operand (op0, mode0);
15386 if ((optimize && !register_operand (op0, mode0))
15387 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15388 op0 = copy_to_mode_reg (mode0, op0);
15391 pat = GEN_FCN (icode) (target, op0);
15392 if (! pat)
15393 return 0;
15394 emit_insn (pat);
15395 return target;
15398 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15399 sqrtss, rsqrtss, rcpss. */
15401 static rtx
15402 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15404 rtx pat;
15405 tree arg0 = TREE_VALUE (arglist);
15406 rtx op1, op0 = expand_normal (arg0);
15407 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15408 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15410 if (optimize || !target
15411 || GET_MODE (target) != tmode
15412 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15413 target = gen_reg_rtx (tmode);
15415 if (VECTOR_MODE_P (mode0))
15416 op0 = safe_vector_operand (op0, mode0);
15418 if ((optimize && !register_operand (op0, mode0))
15419 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15420 op0 = copy_to_mode_reg (mode0, op0);
15422 op1 = op0;
15423 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15424 op1 = copy_to_mode_reg (mode0, op1);
15426 pat = GEN_FCN (icode) (target, op0, op1);
15427 if (! pat)
15428 return 0;
15429 emit_insn (pat);
15430 return target;
15433 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15435 static rtx
15436 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15437 rtx target)
15439 rtx pat;
15440 tree arg0 = TREE_VALUE (arglist);
15441 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15442 rtx op0 = expand_normal (arg0);
15443 rtx op1 = expand_normal (arg1);
15444 rtx op2;
15445 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15446 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15447 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15448 enum rtx_code comparison = d->comparison;
15450 if (VECTOR_MODE_P (mode0))
15451 op0 = safe_vector_operand (op0, mode0);
15452 if (VECTOR_MODE_P (mode1))
15453 op1 = safe_vector_operand (op1, mode1);
15455 /* Swap operands if we have a comparison that isn't available in
15456 hardware. */
15457 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15459 rtx tmp = gen_reg_rtx (mode1);
15460 emit_move_insn (tmp, op1);
15461 op1 = op0;
15462 op0 = tmp;
15465 if (optimize || !target
15466 || GET_MODE (target) != tmode
15467 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15468 target = gen_reg_rtx (tmode);
15470 if ((optimize && !register_operand (op0, mode0))
15471 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15472 op0 = copy_to_mode_reg (mode0, op0);
15473 if ((optimize && !register_operand (op1, mode1))
15474 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15475 op1 = copy_to_mode_reg (mode1, op1);
15477 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15478 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15479 if (! pat)
15480 return 0;
15481 emit_insn (pat);
15482 return target;
15485 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15487 static rtx
15488 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15489 rtx target)
15491 rtx pat;
15492 tree arg0 = TREE_VALUE (arglist);
15493 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15494 rtx op0 = expand_normal (arg0);
15495 rtx op1 = expand_normal (arg1);
15496 rtx op2;
15497 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15498 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15499 enum rtx_code comparison = d->comparison;
15501 if (VECTOR_MODE_P (mode0))
15502 op0 = safe_vector_operand (op0, mode0);
15503 if (VECTOR_MODE_P (mode1))
15504 op1 = safe_vector_operand (op1, mode1);
15506 /* Swap operands if we have a comparison that isn't available in
15507 hardware. */
15508 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15510 rtx tmp = op1;
15511 op1 = op0;
15512 op0 = tmp;
15515 target = gen_reg_rtx (SImode);
15516 emit_move_insn (target, const0_rtx);
15517 target = gen_rtx_SUBREG (QImode, target, 0);
15519 if ((optimize && !register_operand (op0, mode0))
15520 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15521 op0 = copy_to_mode_reg (mode0, op0);
15522 if ((optimize && !register_operand (op1, mode1))
15523 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15524 op1 = copy_to_mode_reg (mode1, op1);
15526 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15527 pat = GEN_FCN (d->icode) (op0, op1);
15528 if (! pat)
15529 return 0;
15530 emit_insn (pat);
15531 emit_insn (gen_rtx_SET (VOIDmode,
15532 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15533 gen_rtx_fmt_ee (comparison, QImode,
15534 SET_DEST (pat),
15535 const0_rtx)));
15537 return SUBREG_REG (target);
15540 /* Return the integer constant in ARG. Constrain it to be in the range
15541 of the subparts of VEC_TYPE; issue an error if not. */
15543 static int
15544 get_element_number (tree vec_type, tree arg)
15546 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15548 if (!host_integerp (arg, 1)
15549 || (elt = tree_low_cst (arg, 1), elt > max))
15551 error ("selector must be an integer constant in the range 0..%wi", max);
15552 return 0;
15555 return elt;
15558 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15559 ix86_expand_vector_init. We DO have language-level syntax for this, in
15560 the form of (type){ init-list }. Except that since we can't place emms
15561 instructions from inside the compiler, we can't allow the use of MMX
15562 registers unless the user explicitly asks for it. So we do *not* define
15563 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15564 we have builtins invoked by mmintrin.h that gives us license to emit
15565 these sorts of instructions. */
15567 static rtx
15568 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15570 enum machine_mode tmode = TYPE_MODE (type);
15571 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15572 int i, n_elt = GET_MODE_NUNITS (tmode);
15573 rtvec v = rtvec_alloc (n_elt);
15575 gcc_assert (VECTOR_MODE_P (tmode));
15577 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15579 rtx x = expand_normal (TREE_VALUE (arglist));
15580 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15583 gcc_assert (arglist == NULL);
15585 if (!target || !register_operand (target, tmode))
15586 target = gen_reg_rtx (tmode);
15588 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15589 return target;
15592 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15593 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15594 had a language-level syntax for referencing vector elements. */
15596 static rtx
15597 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15599 enum machine_mode tmode, mode0;
15600 tree arg0, arg1;
15601 int elt;
15602 rtx op0;
15604 arg0 = TREE_VALUE (arglist);
15605 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15607 op0 = expand_normal (arg0);
15608 elt = get_element_number (TREE_TYPE (arg0), arg1);
15610 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15611 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15612 gcc_assert (VECTOR_MODE_P (mode0));
15614 op0 = force_reg (mode0, op0);
15616 if (optimize || !target || !register_operand (target, tmode))
15617 target = gen_reg_rtx (tmode);
15619 ix86_expand_vector_extract (true, target, op0, elt);
15621 return target;
15624 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15625 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15626 a language-level syntax for referencing vector elements. */
15628 static rtx
15629 ix86_expand_vec_set_builtin (tree arglist)
15631 enum machine_mode tmode, mode1;
15632 tree arg0, arg1, arg2;
15633 int elt;
15634 rtx op0, op1;
15636 arg0 = TREE_VALUE (arglist);
15637 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15638 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15640 tmode = TYPE_MODE (TREE_TYPE (arg0));
15641 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15642 gcc_assert (VECTOR_MODE_P (tmode));
15644 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15645 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15646 elt = get_element_number (TREE_TYPE (arg0), arg2);
15648 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15649 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15651 op0 = force_reg (tmode, op0);
15652 op1 = force_reg (mode1, op1);
15654 ix86_expand_vector_set (true, op0, op1, elt);
15656 return op0;
15659 /* Expand an expression EXP that calls a built-in function,
15660 with result going to TARGET if that's convenient
15661 (and in mode MODE if that's convenient).
15662 SUBTARGET may be used as the target for computing one of EXP's operands.
15663 IGNORE is nonzero if the value is to be ignored. */
15665 static rtx
15666 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15667 enum machine_mode mode ATTRIBUTE_UNUSED,
15668 int ignore ATTRIBUTE_UNUSED)
15670 const struct builtin_description *d;
15671 size_t i;
15672 enum insn_code icode;
15673 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15674 tree arglist = TREE_OPERAND (exp, 1);
15675 tree arg0, arg1, arg2;
15676 rtx op0, op1, op2, pat;
15677 enum machine_mode tmode, mode0, mode1, mode2;
15678 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15680 switch (fcode)
15682 case IX86_BUILTIN_EMMS:
15683 emit_insn (gen_mmx_emms ());
15684 return 0;
15686 case IX86_BUILTIN_SFENCE:
15687 emit_insn (gen_sse_sfence ());
15688 return 0;
15690 case IX86_BUILTIN_MASKMOVQ:
15691 case IX86_BUILTIN_MASKMOVDQU:
15692 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15693 ? CODE_FOR_mmx_maskmovq
15694 : CODE_FOR_sse2_maskmovdqu);
15695 /* Note the arg order is different from the operand order. */
15696 arg1 = TREE_VALUE (arglist);
15697 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15698 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15699 op0 = expand_normal (arg0);
15700 op1 = expand_normal (arg1);
15701 op2 = expand_normal (arg2);
15702 mode0 = insn_data[icode].operand[0].mode;
15703 mode1 = insn_data[icode].operand[1].mode;
15704 mode2 = insn_data[icode].operand[2].mode;
15706 op0 = force_reg (Pmode, op0);
15707 op0 = gen_rtx_MEM (mode1, op0);
15709 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15710 op0 = copy_to_mode_reg (mode0, op0);
15711 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15712 op1 = copy_to_mode_reg (mode1, op1);
15713 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15714 op2 = copy_to_mode_reg (mode2, op2);
15715 pat = GEN_FCN (icode) (op0, op1, op2);
15716 if (! pat)
15717 return 0;
15718 emit_insn (pat);
15719 return 0;
15721 case IX86_BUILTIN_SQRTSS:
15722 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15723 case IX86_BUILTIN_RSQRTSS:
15724 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15725 case IX86_BUILTIN_RCPSS:
15726 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15728 case IX86_BUILTIN_LOADUPS:
15729 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15731 case IX86_BUILTIN_STOREUPS:
15732 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15734 case IX86_BUILTIN_LOADHPS:
15735 case IX86_BUILTIN_LOADLPS:
15736 case IX86_BUILTIN_LOADHPD:
15737 case IX86_BUILTIN_LOADLPD:
15738 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
15739 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
15740 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
15741 : CODE_FOR_sse2_loadlpd);
15742 arg0 = TREE_VALUE (arglist);
15743 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15744 op0 = expand_normal (arg0);
15745 op1 = expand_normal (arg1);
15746 tmode = insn_data[icode].operand[0].mode;
15747 mode0 = insn_data[icode].operand[1].mode;
15748 mode1 = insn_data[icode].operand[2].mode;
15750 op0 = force_reg (mode0, op0);
15751 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
15752 if (optimize || target == 0
15753 || GET_MODE (target) != tmode
15754 || !register_operand (target, tmode))
15755 target = gen_reg_rtx (tmode);
15756 pat = GEN_FCN (icode) (target, op0, op1);
15757 if (! pat)
15758 return 0;
15759 emit_insn (pat);
15760 return target;
15762 case IX86_BUILTIN_STOREHPS:
15763 case IX86_BUILTIN_STORELPS:
15764 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
15765 : CODE_FOR_sse_storelps);
15766 arg0 = TREE_VALUE (arglist);
15767 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15768 op0 = expand_normal (arg0);
15769 op1 = expand_normal (arg1);
15770 mode0 = insn_data[icode].operand[0].mode;
15771 mode1 = insn_data[icode].operand[1].mode;
15773 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15774 op1 = force_reg (mode1, op1);
15776 pat = GEN_FCN (icode) (op0, op1);
15777 if (! pat)
15778 return 0;
15779 emit_insn (pat);
15780 return const0_rtx;
15782 case IX86_BUILTIN_MOVNTPS:
15783 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
15784 case IX86_BUILTIN_MOVNTQ:
15785 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
15787 case IX86_BUILTIN_LDMXCSR:
15788 op0 = expand_normal (TREE_VALUE (arglist));
15789 target = assign_386_stack_local (SImode, SLOT_TEMP);
15790 emit_move_insn (target, op0);
15791 emit_insn (gen_sse_ldmxcsr (target));
15792 return 0;
15794 case IX86_BUILTIN_STMXCSR:
15795 target = assign_386_stack_local (SImode, SLOT_TEMP);
15796 emit_insn (gen_sse_stmxcsr (target));
15797 return copy_to_mode_reg (SImode, target);
15799 case IX86_BUILTIN_SHUFPS:
15800 case IX86_BUILTIN_SHUFPD:
15801 icode = (fcode == IX86_BUILTIN_SHUFPS
15802 ? CODE_FOR_sse_shufps
15803 : CODE_FOR_sse2_shufpd);
15804 arg0 = TREE_VALUE (arglist);
15805 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15806 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15807 op0 = expand_normal (arg0);
15808 op1 = expand_normal (arg1);
15809 op2 = expand_normal (arg2);
15810 tmode = insn_data[icode].operand[0].mode;
15811 mode0 = insn_data[icode].operand[1].mode;
15812 mode1 = insn_data[icode].operand[2].mode;
15813 mode2 = insn_data[icode].operand[3].mode;
15815 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15816 op0 = copy_to_mode_reg (mode0, op0);
15817 if ((optimize && !register_operand (op1, mode1))
15818 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
15819 op1 = copy_to_mode_reg (mode1, op1);
15820 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15822 /* @@@ better error message */
15823 error ("mask must be an immediate");
15824 return gen_reg_rtx (tmode);
15826 if (optimize || target == 0
15827 || GET_MODE (target) != tmode
15828 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15829 target = gen_reg_rtx (tmode);
15830 pat = GEN_FCN (icode) (target, op0, op1, op2);
15831 if (! pat)
15832 return 0;
15833 emit_insn (pat);
15834 return target;
15836 case IX86_BUILTIN_PSHUFW:
15837 case IX86_BUILTIN_PSHUFD:
15838 case IX86_BUILTIN_PSHUFHW:
15839 case IX86_BUILTIN_PSHUFLW:
15840 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15841 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15842 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15843 : CODE_FOR_mmx_pshufw);
15844 arg0 = TREE_VALUE (arglist);
15845 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15846 op0 = expand_normal (arg0);
15847 op1 = expand_normal (arg1);
15848 tmode = insn_data[icode].operand[0].mode;
15849 mode1 = insn_data[icode].operand[1].mode;
15850 mode2 = insn_data[icode].operand[2].mode;
15852 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15853 op0 = copy_to_mode_reg (mode1, op0);
15854 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15856 /* @@@ better error message */
15857 error ("mask must be an immediate");
15858 return const0_rtx;
15860 if (target == 0
15861 || GET_MODE (target) != tmode
15862 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15863 target = gen_reg_rtx (tmode);
15864 pat = GEN_FCN (icode) (target, op0, op1);
15865 if (! pat)
15866 return 0;
15867 emit_insn (pat);
15868 return target;
15870 case IX86_BUILTIN_PSLLDQI128:
15871 case IX86_BUILTIN_PSRLDQI128:
15872 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
15873 : CODE_FOR_sse2_lshrti3);
15874 arg0 = TREE_VALUE (arglist);
15875 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15876 op0 = expand_normal (arg0);
15877 op1 = expand_normal (arg1);
15878 tmode = insn_data[icode].operand[0].mode;
15879 mode1 = insn_data[icode].operand[1].mode;
15880 mode2 = insn_data[icode].operand[2].mode;
15882 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15884 op0 = copy_to_reg (op0);
15885 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
15887 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15889 error ("shift must be an immediate");
15890 return const0_rtx;
15892 target = gen_reg_rtx (V2DImode);
15893 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
15894 if (! pat)
15895 return 0;
15896 emit_insn (pat);
15897 return target;
15899 case IX86_BUILTIN_FEMMS:
15900 emit_insn (gen_mmx_femms ());
15901 return NULL_RTX;
15903 case IX86_BUILTIN_PAVGUSB:
15904 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
15906 case IX86_BUILTIN_PF2ID:
15907 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
15909 case IX86_BUILTIN_PFACC:
15910 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
15912 case IX86_BUILTIN_PFADD:
15913 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
15915 case IX86_BUILTIN_PFCMPEQ:
15916 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
15918 case IX86_BUILTIN_PFCMPGE:
15919 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
15921 case IX86_BUILTIN_PFCMPGT:
15922 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
15924 case IX86_BUILTIN_PFMAX:
15925 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
15927 case IX86_BUILTIN_PFMIN:
15928 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
15930 case IX86_BUILTIN_PFMUL:
15931 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
15933 case IX86_BUILTIN_PFRCP:
15934 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
15936 case IX86_BUILTIN_PFRCPIT1:
15937 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
15939 case IX86_BUILTIN_PFRCPIT2:
15940 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
15942 case IX86_BUILTIN_PFRSQIT1:
15943 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
15945 case IX86_BUILTIN_PFRSQRT:
15946 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
15948 case IX86_BUILTIN_PFSUB:
15949 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
15951 case IX86_BUILTIN_PFSUBR:
15952 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
15954 case IX86_BUILTIN_PI2FD:
15955 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
15957 case IX86_BUILTIN_PMULHRW:
15958 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
15960 case IX86_BUILTIN_PF2IW:
15961 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
15963 case IX86_BUILTIN_PFNACC:
15964 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
15966 case IX86_BUILTIN_PFPNACC:
15967 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
15969 case IX86_BUILTIN_PI2FW:
15970 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
15972 case IX86_BUILTIN_PSWAPDSI:
15973 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
15975 case IX86_BUILTIN_PSWAPDSF:
15976 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
15978 case IX86_BUILTIN_SQRTSD:
15979 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
15980 case IX86_BUILTIN_LOADUPD:
15981 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
15982 case IX86_BUILTIN_STOREUPD:
15983 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
15985 case IX86_BUILTIN_MFENCE:
15986 emit_insn (gen_sse2_mfence ());
15987 return 0;
15988 case IX86_BUILTIN_LFENCE:
15989 emit_insn (gen_sse2_lfence ());
15990 return 0;
15992 case IX86_BUILTIN_CLFLUSH:
15993 arg0 = TREE_VALUE (arglist);
15994 op0 = expand_normal (arg0);
15995 icode = CODE_FOR_sse2_clflush;
15996 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
15997 op0 = copy_to_mode_reg (Pmode, op0);
15999 emit_insn (gen_sse2_clflush (op0));
16000 return 0;
16002 case IX86_BUILTIN_MOVNTPD:
16003 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16004 case IX86_BUILTIN_MOVNTDQ:
16005 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16006 case IX86_BUILTIN_MOVNTI:
16007 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16009 case IX86_BUILTIN_LOADDQU:
16010 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16011 case IX86_BUILTIN_STOREDQU:
16012 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16014 case IX86_BUILTIN_MONITOR:
16015 arg0 = TREE_VALUE (arglist);
16016 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16017 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16018 op0 = expand_normal (arg0);
16019 op1 = expand_normal (arg1);
16020 op2 = expand_normal (arg2);
16021 if (!REG_P (op0))
16022 op0 = copy_to_mode_reg (SImode, op0);
16023 if (!REG_P (op1))
16024 op1 = copy_to_mode_reg (SImode, op1);
16025 if (!REG_P (op2))
16026 op2 = copy_to_mode_reg (SImode, op2);
16027 emit_insn (gen_sse3_monitor (op0, op1, op2));
16028 return 0;
16030 case IX86_BUILTIN_MWAIT:
16031 arg0 = TREE_VALUE (arglist);
16032 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16033 op0 = expand_normal (arg0);
16034 op1 = expand_normal (arg1);
16035 if (!REG_P (op0))
16036 op0 = copy_to_mode_reg (SImode, op0);
16037 if (!REG_P (op1))
16038 op1 = copy_to_mode_reg (SImode, op1);
16039 emit_insn (gen_sse3_mwait (op0, op1));
16040 return 0;
16042 case IX86_BUILTIN_LDDQU:
16043 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16044 target, 1);
16046 case IX86_BUILTIN_VEC_INIT_V2SI:
16047 case IX86_BUILTIN_VEC_INIT_V4HI:
16048 case IX86_BUILTIN_VEC_INIT_V8QI:
16049 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16051 case IX86_BUILTIN_VEC_EXT_V2DF:
16052 case IX86_BUILTIN_VEC_EXT_V2DI:
16053 case IX86_BUILTIN_VEC_EXT_V4SF:
16054 case IX86_BUILTIN_VEC_EXT_V4SI:
16055 case IX86_BUILTIN_VEC_EXT_V8HI:
16056 case IX86_BUILTIN_VEC_EXT_V2SI:
16057 case IX86_BUILTIN_VEC_EXT_V4HI:
16058 return ix86_expand_vec_ext_builtin (arglist, target);
16060 case IX86_BUILTIN_VEC_SET_V8HI:
16061 case IX86_BUILTIN_VEC_SET_V4HI:
16062 return ix86_expand_vec_set_builtin (arglist);
16064 default:
16065 break;
16068 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16069 if (d->code == fcode)
16071 /* Compares are treated specially. */
16072 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16073 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16074 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16075 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16076 return ix86_expand_sse_compare (d, arglist, target);
16078 return ix86_expand_binop_builtin (d->icode, arglist, target);
16081 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16082 if (d->code == fcode)
16083 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16085 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16086 if (d->code == fcode)
16087 return ix86_expand_sse_comi (d, arglist, target);
16089 gcc_unreachable ();
16092 /* Store OPERAND to the memory after reload is completed. This means
16093 that we can't easily use assign_stack_local. */
16095 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16097 rtx result;
16099 gcc_assert (reload_completed);
16100 if (TARGET_RED_ZONE)
16102 result = gen_rtx_MEM (mode,
16103 gen_rtx_PLUS (Pmode,
16104 stack_pointer_rtx,
16105 GEN_INT (-RED_ZONE_SIZE)));
16106 emit_move_insn (result, operand);
16108 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16110 switch (mode)
16112 case HImode:
16113 case SImode:
16114 operand = gen_lowpart (DImode, operand);
16115 /* FALLTHRU */
16116 case DImode:
16117 emit_insn (
16118 gen_rtx_SET (VOIDmode,
16119 gen_rtx_MEM (DImode,
16120 gen_rtx_PRE_DEC (DImode,
16121 stack_pointer_rtx)),
16122 operand));
16123 break;
16124 default:
16125 gcc_unreachable ();
16127 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16129 else
16131 switch (mode)
16133 case DImode:
16135 rtx operands[2];
16136 split_di (&operand, 1, operands, operands + 1);
16137 emit_insn (
16138 gen_rtx_SET (VOIDmode,
16139 gen_rtx_MEM (SImode,
16140 gen_rtx_PRE_DEC (Pmode,
16141 stack_pointer_rtx)),
16142 operands[1]));
16143 emit_insn (
16144 gen_rtx_SET (VOIDmode,
16145 gen_rtx_MEM (SImode,
16146 gen_rtx_PRE_DEC (Pmode,
16147 stack_pointer_rtx)),
16148 operands[0]));
16150 break;
16151 case HImode:
16152 /* Store HImodes as SImodes. */
16153 operand = gen_lowpart (SImode, operand);
16154 /* FALLTHRU */
16155 case SImode:
16156 emit_insn (
16157 gen_rtx_SET (VOIDmode,
16158 gen_rtx_MEM (GET_MODE (operand),
16159 gen_rtx_PRE_DEC (SImode,
16160 stack_pointer_rtx)),
16161 operand));
16162 break;
16163 default:
16164 gcc_unreachable ();
16166 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16168 return result;
16171 /* Free operand from the memory. */
16172 void
16173 ix86_free_from_memory (enum machine_mode mode)
16175 if (!TARGET_RED_ZONE)
16177 int size;
16179 if (mode == DImode || TARGET_64BIT)
16180 size = 8;
16181 else
16182 size = 4;
16183 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16184 to pop or add instruction if registers are available. */
16185 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16186 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16187 GEN_INT (size))));
16191 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16192 QImode must go into class Q_REGS.
16193 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16194 movdf to do mem-to-mem moves through integer regs. */
16195 enum reg_class
16196 ix86_preferred_reload_class (rtx x, enum reg_class class)
16198 /* We're only allowed to return a subclass of CLASS. Many of the
16199 following checks fail for NO_REGS, so eliminate that early. */
16200 if (class == NO_REGS)
16201 return NO_REGS;
16203 /* All classes can load zeros. */
16204 if (x == CONST0_RTX (GET_MODE (x)))
16205 return class;
16207 /* Floating-point constants need more complex checks. */
16208 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16210 /* General regs can load everything. */
16211 if (reg_class_subset_p (class, GENERAL_REGS))
16212 return class;
16214 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16215 zero above. We only want to wind up preferring 80387 registers if
16216 we plan on doing computation with them. */
16217 if (TARGET_80387
16218 && (TARGET_MIX_SSE_I387
16219 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
16220 && standard_80387_constant_p (x))
16222 /* Limit class to non-sse. */
16223 if (class == FLOAT_SSE_REGS)
16224 return FLOAT_REGS;
16225 if (class == FP_TOP_SSE_REGS)
16226 return FP_TOP_REG;
16227 if (class == FP_SECOND_SSE_REGS)
16228 return FP_SECOND_REG;
16229 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16230 return class;
16233 return NO_REGS;
16235 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
16236 return NO_REGS;
16237 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
16238 return NO_REGS;
16240 /* Generally when we see PLUS here, it's the function invariant
16241 (plus soft-fp const_int). Which can only be computed into general
16242 regs. */
16243 if (GET_CODE (x) == PLUS)
16244 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16246 /* QImode constants are easy to load, but non-constant QImode data
16247 must go into Q_REGS. */
16248 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16250 if (reg_class_subset_p (class, Q_REGS))
16251 return class;
16252 if (reg_class_subset_p (Q_REGS, class))
16253 return Q_REGS;
16254 return NO_REGS;
16257 return class;
16260 /* If we are copying between general and FP registers, we need a memory
16261 location. The same is true for SSE and MMX registers.
16263 The macro can't work reliably when one of the CLASSES is class containing
16264 registers from multiple units (SSE, MMX, integer). We avoid this by never
16265 combining those units in single alternative in the machine description.
16266 Ensure that this constraint holds to avoid unexpected surprises.
16268 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16269 enforce these sanity checks. */
16272 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16273 enum machine_mode mode, int strict)
16275 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16276 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16277 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16278 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16279 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16280 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16282 gcc_assert (!strict);
16283 return true;
16286 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16287 return true;
16289 /* ??? This is a lie. We do have moves between mmx/general, and for
16290 mmx/sse2. But by saying we need secondary memory we discourage the
16291 register allocator from using the mmx registers unless needed. */
16292 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16293 return true;
16295 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16297 /* SSE1 doesn't have any direct moves from other classes. */
16298 if (!TARGET_SSE2)
16299 return true;
16301 /* If the target says that inter-unit moves are more expensive
16302 than moving through memory, then don't generate them. */
16303 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16304 return true;
16306 /* Between SSE and general, we have moves no larger than word size. */
16307 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16308 return true;
16310 /* ??? For the cost of one register reformat penalty, we could use
16311 the same instructions to move SFmode and DFmode data, but the
16312 relevant move patterns don't support those alternatives. */
16313 if (mode == SFmode || mode == DFmode)
16314 return true;
16317 return false;
16320 /* Return true if the registers in CLASS cannot represent the change from
16321 modes FROM to TO. */
16323 bool
16324 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16325 enum reg_class class)
16327 if (from == to)
16328 return false;
16330 /* x87 registers can't do subreg at all, as all values are reformatted
16331 to extended precision. */
16332 if (MAYBE_FLOAT_CLASS_P (class))
16333 return true;
16335 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16337 /* Vector registers do not support QI or HImode loads. If we don't
16338 disallow a change to these modes, reload will assume it's ok to
16339 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16340 the vec_dupv4hi pattern. */
16341 if (GET_MODE_SIZE (from) < 4)
16342 return true;
16344 /* Vector registers do not support subreg with nonzero offsets, which
16345 are otherwise valid for integer registers. Since we can't see
16346 whether we have a nonzero offset from here, prohibit all
16347 nonparadoxical subregs changing size. */
16348 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16349 return true;
16352 return false;
16355 /* Return the cost of moving data from a register in class CLASS1 to
16356 one in class CLASS2.
16358 It is not required that the cost always equal 2 when FROM is the same as TO;
16359 on some machines it is expensive to move between registers if they are not
16360 general registers. */
16363 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16364 enum reg_class class2)
16366 /* In case we require secondary memory, compute cost of the store followed
16367 by load. In order to avoid bad register allocation choices, we need
16368 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16370 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16372 int cost = 1;
16374 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16375 MEMORY_MOVE_COST (mode, class1, 1));
16376 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16377 MEMORY_MOVE_COST (mode, class2, 1));
16379 /* In case of copying from general_purpose_register we may emit multiple
16380 stores followed by single load causing memory size mismatch stall.
16381 Count this as arbitrarily high cost of 20. */
16382 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16383 cost += 20;
16385 /* In the case of FP/MMX moves, the registers actually overlap, and we
16386 have to switch modes in order to treat them differently. */
16387 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16388 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16389 cost += 20;
16391 return cost;
16394 /* Moves between SSE/MMX and integer unit are expensive. */
16395 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16396 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16397 return ix86_cost->mmxsse_to_integer;
16398 if (MAYBE_FLOAT_CLASS_P (class1))
16399 return ix86_cost->fp_move;
16400 if (MAYBE_SSE_CLASS_P (class1))
16401 return ix86_cost->sse_move;
16402 if (MAYBE_MMX_CLASS_P (class1))
16403 return ix86_cost->mmx_move;
16404 return 2;
16407 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16409 bool
16410 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16412 /* Flags and only flags can only hold CCmode values. */
16413 if (CC_REGNO_P (regno))
16414 return GET_MODE_CLASS (mode) == MODE_CC;
16415 if (GET_MODE_CLASS (mode) == MODE_CC
16416 || GET_MODE_CLASS (mode) == MODE_RANDOM
16417 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16418 return 0;
16419 if (FP_REGNO_P (regno))
16420 return VALID_FP_MODE_P (mode);
16421 if (SSE_REGNO_P (regno))
16423 /* We implement the move patterns for all vector modes into and
16424 out of SSE registers, even when no operation instructions
16425 are available. */
16426 return (VALID_SSE_REG_MODE (mode)
16427 || VALID_SSE2_REG_MODE (mode)
16428 || VALID_MMX_REG_MODE (mode)
16429 || VALID_MMX_REG_MODE_3DNOW (mode));
16431 if (MMX_REGNO_P (regno))
16433 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16434 so if the register is available at all, then we can move data of
16435 the given mode into or out of it. */
16436 return (VALID_MMX_REG_MODE (mode)
16437 || VALID_MMX_REG_MODE_3DNOW (mode));
16440 if (mode == QImode)
16442 /* Take care for QImode values - they can be in non-QI regs,
16443 but then they do cause partial register stalls. */
16444 if (regno < 4 || TARGET_64BIT)
16445 return 1;
16446 if (!TARGET_PARTIAL_REG_STALL)
16447 return 1;
16448 return reload_in_progress || reload_completed;
16450 /* We handle both integer and floats in the general purpose registers. */
16451 else if (VALID_INT_MODE_P (mode))
16452 return 1;
16453 else if (VALID_FP_MODE_P (mode))
16454 return 1;
16455 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16456 on to use that value in smaller contexts, this can easily force a
16457 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16458 supporting DImode, allow it. */
16459 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16460 return 1;
16462 return 0;
16465 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16466 tieable integer mode. */
16468 static bool
16469 ix86_tieable_integer_mode_p (enum machine_mode mode)
16471 switch (mode)
16473 case HImode:
16474 case SImode:
16475 return true;
16477 case QImode:
16478 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16480 case DImode:
16481 return TARGET_64BIT;
16483 default:
16484 return false;
16488 /* Return true if MODE1 is accessible in a register that can hold MODE2
16489 without copying. That is, all register classes that can hold MODE2
16490 can also hold MODE1. */
16492 bool
16493 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16495 if (mode1 == mode2)
16496 return true;
16498 if (ix86_tieable_integer_mode_p (mode1)
16499 && ix86_tieable_integer_mode_p (mode2))
16500 return true;
16502 /* MODE2 being XFmode implies fp stack or general regs, which means we
16503 can tie any smaller floating point modes to it. Note that we do not
16504 tie this with TFmode. */
16505 if (mode2 == XFmode)
16506 return mode1 == SFmode || mode1 == DFmode;
16508 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16509 that we can tie it with SFmode. */
16510 if (mode2 == DFmode)
16511 return mode1 == SFmode;
16513 /* If MODE2 is only appropriate for an SSE register, then tie with
16514 any other mode acceptable to SSE registers. */
16515 if (GET_MODE_SIZE (mode2) >= 8
16516 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16517 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16519 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16520 with any other mode acceptable to MMX registers. */
16521 if (GET_MODE_SIZE (mode2) == 8
16522 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16523 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16525 return false;
16528 /* Return the cost of moving data of mode M between a
16529 register and memory. A value of 2 is the default; this cost is
16530 relative to those in `REGISTER_MOVE_COST'.
16532 If moving between registers and memory is more expensive than
16533 between two registers, you should define this macro to express the
16534 relative cost.
16536 Model also increased moving costs of QImode registers in non
16537 Q_REGS classes.
16540 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16542 if (FLOAT_CLASS_P (class))
16544 int index;
16545 switch (mode)
16547 case SFmode:
16548 index = 0;
16549 break;
16550 case DFmode:
16551 index = 1;
16552 break;
16553 case XFmode:
16554 index = 2;
16555 break;
16556 default:
16557 return 100;
16559 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16561 if (SSE_CLASS_P (class))
16563 int index;
16564 switch (GET_MODE_SIZE (mode))
16566 case 4:
16567 index = 0;
16568 break;
16569 case 8:
16570 index = 1;
16571 break;
16572 case 16:
16573 index = 2;
16574 break;
16575 default:
16576 return 100;
16578 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16580 if (MMX_CLASS_P (class))
16582 int index;
16583 switch (GET_MODE_SIZE (mode))
16585 case 4:
16586 index = 0;
16587 break;
16588 case 8:
16589 index = 1;
16590 break;
16591 default:
16592 return 100;
16594 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16596 switch (GET_MODE_SIZE (mode))
16598 case 1:
16599 if (in)
16600 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16601 : ix86_cost->movzbl_load);
16602 else
16603 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16604 : ix86_cost->int_store[0] + 4);
16605 break;
16606 case 2:
16607 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16608 default:
16609 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16610 if (mode == TFmode)
16611 mode = XFmode;
16612 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16613 * (((int) GET_MODE_SIZE (mode)
16614 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16618 /* Compute a (partial) cost for rtx X. Return true if the complete
16619 cost has been computed, and false if subexpressions should be
16620 scanned. In either case, *TOTAL contains the cost result. */
16622 static bool
16623 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16625 enum machine_mode mode = GET_MODE (x);
16627 switch (code)
16629 case CONST_INT:
16630 case CONST:
16631 case LABEL_REF:
16632 case SYMBOL_REF:
16633 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16634 *total = 3;
16635 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16636 *total = 2;
16637 else if (flag_pic && SYMBOLIC_CONST (x)
16638 && (!TARGET_64BIT
16639 || (!GET_CODE (x) != LABEL_REF
16640 && (GET_CODE (x) != SYMBOL_REF
16641 || !SYMBOL_REF_LOCAL_P (x)))))
16642 *total = 1;
16643 else
16644 *total = 0;
16645 return true;
16647 case CONST_DOUBLE:
16648 if (mode == VOIDmode)
16649 *total = 0;
16650 else
16651 switch (standard_80387_constant_p (x))
16653 case 1: /* 0.0 */
16654 *total = 1;
16655 break;
16656 default: /* Other constants */
16657 *total = 2;
16658 break;
16659 case 0:
16660 case -1:
16661 /* Start with (MEM (SYMBOL_REF)), since that's where
16662 it'll probably end up. Add a penalty for size. */
16663 *total = (COSTS_N_INSNS (1)
16664 + (flag_pic != 0 && !TARGET_64BIT)
16665 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16666 break;
16668 return true;
16670 case ZERO_EXTEND:
16671 /* The zero extensions is often completely free on x86_64, so make
16672 it as cheap as possible. */
16673 if (TARGET_64BIT && mode == DImode
16674 && GET_MODE (XEXP (x, 0)) == SImode)
16675 *total = 1;
16676 else if (TARGET_ZERO_EXTEND_WITH_AND)
16677 *total = ix86_cost->add;
16678 else
16679 *total = ix86_cost->movzx;
16680 return false;
16682 case SIGN_EXTEND:
16683 *total = ix86_cost->movsx;
16684 return false;
16686 case ASHIFT:
16687 if (GET_CODE (XEXP (x, 1)) == CONST_INT
16688 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16690 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16691 if (value == 1)
16693 *total = ix86_cost->add;
16694 return false;
16696 if ((value == 2 || value == 3)
16697 && ix86_cost->lea <= ix86_cost->shift_const)
16699 *total = ix86_cost->lea;
16700 return false;
16703 /* FALLTHRU */
16705 case ROTATE:
16706 case ASHIFTRT:
16707 case LSHIFTRT:
16708 case ROTATERT:
16709 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
16711 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16713 if (INTVAL (XEXP (x, 1)) > 32)
16714 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
16715 else
16716 *total = ix86_cost->shift_const * 2;
16718 else
16720 if (GET_CODE (XEXP (x, 1)) == AND)
16721 *total = ix86_cost->shift_var * 2;
16722 else
16723 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
16726 else
16728 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16729 *total = ix86_cost->shift_const;
16730 else
16731 *total = ix86_cost->shift_var;
16733 return false;
16735 case MULT:
16736 if (FLOAT_MODE_P (mode))
16738 *total = ix86_cost->fmul;
16739 return false;
16741 else
16743 rtx op0 = XEXP (x, 0);
16744 rtx op1 = XEXP (x, 1);
16745 int nbits;
16746 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16748 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16749 for (nbits = 0; value != 0; value &= value - 1)
16750 nbits++;
16752 else
16753 /* This is arbitrary. */
16754 nbits = 7;
16756 /* Compute costs correctly for widening multiplication. */
16757 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
16758 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
16759 == GET_MODE_SIZE (mode))
16761 int is_mulwiden = 0;
16762 enum machine_mode inner_mode = GET_MODE (op0);
16764 if (GET_CODE (op0) == GET_CODE (op1))
16765 is_mulwiden = 1, op1 = XEXP (op1, 0);
16766 else if (GET_CODE (op1) == CONST_INT)
16768 if (GET_CODE (op0) == SIGN_EXTEND)
16769 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
16770 == INTVAL (op1);
16771 else
16772 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
16775 if (is_mulwiden)
16776 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
16779 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
16780 + nbits * ix86_cost->mult_bit
16781 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
16783 return true;
16786 case DIV:
16787 case UDIV:
16788 case MOD:
16789 case UMOD:
16790 if (FLOAT_MODE_P (mode))
16791 *total = ix86_cost->fdiv;
16792 else
16793 *total = ix86_cost->divide[MODE_INDEX (mode)];
16794 return false;
16796 case PLUS:
16797 if (FLOAT_MODE_P (mode))
16798 *total = ix86_cost->fadd;
16799 else if (GET_MODE_CLASS (mode) == MODE_INT
16800 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
16802 if (GET_CODE (XEXP (x, 0)) == PLUS
16803 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16804 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
16805 && CONSTANT_P (XEXP (x, 1)))
16807 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
16808 if (val == 2 || val == 4 || val == 8)
16810 *total = ix86_cost->lea;
16811 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16812 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
16813 outer_code);
16814 *total += rtx_cost (XEXP (x, 1), outer_code);
16815 return true;
16818 else if (GET_CODE (XEXP (x, 0)) == MULT
16819 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
16821 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
16822 if (val == 2 || val == 4 || val == 8)
16824 *total = ix86_cost->lea;
16825 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16826 *total += rtx_cost (XEXP (x, 1), outer_code);
16827 return true;
16830 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16832 *total = ix86_cost->lea;
16833 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16834 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16835 *total += rtx_cost (XEXP (x, 1), outer_code);
16836 return true;
16839 /* FALLTHRU */
16841 case MINUS:
16842 if (FLOAT_MODE_P (mode))
16844 *total = ix86_cost->fadd;
16845 return false;
16847 /* FALLTHRU */
16849 case AND:
16850 case IOR:
16851 case XOR:
16852 if (!TARGET_64BIT && mode == DImode)
16854 *total = (ix86_cost->add * 2
16855 + (rtx_cost (XEXP (x, 0), outer_code)
16856 << (GET_MODE (XEXP (x, 0)) != DImode))
16857 + (rtx_cost (XEXP (x, 1), outer_code)
16858 << (GET_MODE (XEXP (x, 1)) != DImode)));
16859 return true;
16861 /* FALLTHRU */
16863 case NEG:
16864 if (FLOAT_MODE_P (mode))
16866 *total = ix86_cost->fchs;
16867 return false;
16869 /* FALLTHRU */
16871 case NOT:
16872 if (!TARGET_64BIT && mode == DImode)
16873 *total = ix86_cost->add * 2;
16874 else
16875 *total = ix86_cost->add;
16876 return false;
16878 case COMPARE:
16879 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
16880 && XEXP (XEXP (x, 0), 1) == const1_rtx
16881 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
16882 && XEXP (x, 1) == const0_rtx)
16884 /* This kind of construct is implemented using test[bwl].
16885 Treat it as if we had an AND. */
16886 *total = (ix86_cost->add
16887 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
16888 + rtx_cost (const1_rtx, outer_code));
16889 return true;
16891 return false;
16893 case FLOAT_EXTEND:
16894 if (!TARGET_SSE_MATH
16895 || mode == XFmode
16896 || (mode == DFmode && !TARGET_SSE2))
16897 *total = 0;
16898 return false;
16900 case ABS:
16901 if (FLOAT_MODE_P (mode))
16902 *total = ix86_cost->fabs;
16903 return false;
16905 case SQRT:
16906 if (FLOAT_MODE_P (mode))
16907 *total = ix86_cost->fsqrt;
16908 return false;
16910 case UNSPEC:
16911 if (XINT (x, 1) == UNSPEC_TP)
16912 *total = 0;
16913 return false;
16915 default:
16916 return false;
16920 #if TARGET_MACHO
16922 static int current_machopic_label_num;
16924 /* Given a symbol name and its associated stub, write out the
16925 definition of the stub. */
16927 void
16928 machopic_output_stub (FILE *file, const char *symb, const char *stub)
16930 unsigned int length;
16931 char *binder_name, *symbol_name, lazy_ptr_name[32];
16932 int label = ++current_machopic_label_num;
16934 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
16935 symb = (*targetm.strip_name_encoding) (symb);
16937 length = strlen (stub);
16938 binder_name = alloca (length + 32);
16939 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
16941 length = strlen (symb);
16942 symbol_name = alloca (length + 32);
16943 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
16945 sprintf (lazy_ptr_name, "L%d$lz", label);
16947 if (MACHOPIC_PURE)
16948 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
16949 else
16950 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
16952 fprintf (file, "%s:\n", stub);
16953 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16955 if (MACHOPIC_PURE)
16957 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
16958 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
16959 fprintf (file, "\tjmp %%edx\n");
16961 else
16962 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
16964 fprintf (file, "%s:\n", binder_name);
16966 if (MACHOPIC_PURE)
16968 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
16969 fprintf (file, "\tpushl %%eax\n");
16971 else
16972 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
16974 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
16976 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
16977 fprintf (file, "%s:\n", lazy_ptr_name);
16978 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16979 fprintf (file, "\t.long %s\n", binder_name);
16982 void
16983 darwin_x86_file_end (void)
16985 darwin_file_end ();
16986 ix86_file_end ();
16988 #endif /* TARGET_MACHO */
16990 /* Order the registers for register allocator. */
16992 void
16993 x86_order_regs_for_local_alloc (void)
16995 int pos = 0;
16996 int i;
16998 /* First allocate the local general purpose registers. */
16999 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17000 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17001 reg_alloc_order [pos++] = i;
17003 /* Global general purpose registers. */
17004 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17005 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17006 reg_alloc_order [pos++] = i;
17008 /* x87 registers come first in case we are doing FP math
17009 using them. */
17010 if (!TARGET_SSE_MATH)
17011 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17012 reg_alloc_order [pos++] = i;
17014 /* SSE registers. */
17015 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17016 reg_alloc_order [pos++] = i;
17017 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17018 reg_alloc_order [pos++] = i;
17020 /* x87 registers. */
17021 if (TARGET_SSE_MATH)
17022 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17023 reg_alloc_order [pos++] = i;
17025 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17026 reg_alloc_order [pos++] = i;
17028 /* Initialize the rest of array as we do not allocate some registers
17029 at all. */
17030 while (pos < FIRST_PSEUDO_REGISTER)
17031 reg_alloc_order [pos++] = 0;
17034 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17035 struct attribute_spec.handler. */
17036 static tree
17037 ix86_handle_struct_attribute (tree *node, tree name,
17038 tree args ATTRIBUTE_UNUSED,
17039 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17041 tree *type = NULL;
17042 if (DECL_P (*node))
17044 if (TREE_CODE (*node) == TYPE_DECL)
17045 type = &TREE_TYPE (*node);
17047 else
17048 type = node;
17050 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17051 || TREE_CODE (*type) == UNION_TYPE)))
17053 warning (OPT_Wattributes, "%qs attribute ignored",
17054 IDENTIFIER_POINTER (name));
17055 *no_add_attrs = true;
17058 else if ((is_attribute_p ("ms_struct", name)
17059 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17060 || ((is_attribute_p ("gcc_struct", name)
17061 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17063 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17064 IDENTIFIER_POINTER (name));
17065 *no_add_attrs = true;
17068 return NULL_TREE;
17071 static bool
17072 ix86_ms_bitfield_layout_p (tree record_type)
17074 return (TARGET_MS_BITFIELD_LAYOUT &&
17075 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17076 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17079 /* Returns an expression indicating where the this parameter is
17080 located on entry to the FUNCTION. */
17082 static rtx
17083 x86_this_parameter (tree function)
17085 tree type = TREE_TYPE (function);
17087 if (TARGET_64BIT)
17089 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17090 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17093 if (ix86_function_regparm (type, function) > 0)
17095 tree parm;
17097 parm = TYPE_ARG_TYPES (type);
17098 /* Figure out whether or not the function has a variable number of
17099 arguments. */
17100 for (; parm; parm = TREE_CHAIN (parm))
17101 if (TREE_VALUE (parm) == void_type_node)
17102 break;
17103 /* If not, the this parameter is in the first argument. */
17104 if (parm)
17106 int regno = 0;
17107 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17108 regno = 2;
17109 return gen_rtx_REG (SImode, regno);
17113 if (aggregate_value_p (TREE_TYPE (type), type))
17114 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17115 else
17116 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17119 /* Determine whether x86_output_mi_thunk can succeed. */
17121 static bool
17122 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17123 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17124 HOST_WIDE_INT vcall_offset, tree function)
17126 /* 64-bit can handle anything. */
17127 if (TARGET_64BIT)
17128 return true;
17130 /* For 32-bit, everything's fine if we have one free register. */
17131 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17132 return true;
17134 /* Need a free register for vcall_offset. */
17135 if (vcall_offset)
17136 return false;
17138 /* Need a free register for GOT references. */
17139 if (flag_pic && !(*targetm.binds_local_p) (function))
17140 return false;
17142 /* Otherwise ok. */
17143 return true;
17146 /* Output the assembler code for a thunk function. THUNK_DECL is the
17147 declaration for the thunk function itself, FUNCTION is the decl for
17148 the target function. DELTA is an immediate constant offset to be
17149 added to THIS. If VCALL_OFFSET is nonzero, the word at
17150 *(*this + vcall_offset) should be added to THIS. */
17152 static void
17153 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17154 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17155 HOST_WIDE_INT vcall_offset, tree function)
17157 rtx xops[3];
17158 rtx this = x86_this_parameter (function);
17159 rtx this_reg, tmp;
17161 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17162 pull it in now and let DELTA benefit. */
17163 if (REG_P (this))
17164 this_reg = this;
17165 else if (vcall_offset)
17167 /* Put the this parameter into %eax. */
17168 xops[0] = this;
17169 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17170 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17172 else
17173 this_reg = NULL_RTX;
17175 /* Adjust the this parameter by a fixed constant. */
17176 if (delta)
17178 xops[0] = GEN_INT (delta);
17179 xops[1] = this_reg ? this_reg : this;
17180 if (TARGET_64BIT)
17182 if (!x86_64_general_operand (xops[0], DImode))
17184 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17185 xops[1] = tmp;
17186 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17187 xops[0] = tmp;
17188 xops[1] = this;
17190 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17192 else
17193 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17196 /* Adjust the this parameter by a value stored in the vtable. */
17197 if (vcall_offset)
17199 if (TARGET_64BIT)
17200 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17201 else
17203 int tmp_regno = 2 /* ECX */;
17204 if (lookup_attribute ("fastcall",
17205 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17206 tmp_regno = 0 /* EAX */;
17207 tmp = gen_rtx_REG (SImode, tmp_regno);
17210 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17211 xops[1] = tmp;
17212 if (TARGET_64BIT)
17213 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17214 else
17215 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17217 /* Adjust the this parameter. */
17218 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17219 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17221 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17222 xops[0] = GEN_INT (vcall_offset);
17223 xops[1] = tmp2;
17224 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17225 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17227 xops[1] = this_reg;
17228 if (TARGET_64BIT)
17229 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17230 else
17231 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17234 /* If necessary, drop THIS back to its stack slot. */
17235 if (this_reg && this_reg != this)
17237 xops[0] = this_reg;
17238 xops[1] = this;
17239 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17242 xops[0] = XEXP (DECL_RTL (function), 0);
17243 if (TARGET_64BIT)
17245 if (!flag_pic || (*targetm.binds_local_p) (function))
17246 output_asm_insn ("jmp\t%P0", xops);
17247 else
17249 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17250 tmp = gen_rtx_CONST (Pmode, tmp);
17251 tmp = gen_rtx_MEM (QImode, tmp);
17252 xops[0] = tmp;
17253 output_asm_insn ("jmp\t%A0", xops);
17256 else
17258 if (!flag_pic || (*targetm.binds_local_p) (function))
17259 output_asm_insn ("jmp\t%P0", xops);
17260 else
17261 #if TARGET_MACHO
17262 if (TARGET_MACHO)
17264 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17265 tmp = (gen_rtx_SYMBOL_REF
17266 (Pmode,
17267 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17268 tmp = gen_rtx_MEM (QImode, tmp);
17269 xops[0] = tmp;
17270 output_asm_insn ("jmp\t%0", xops);
17272 else
17273 #endif /* TARGET_MACHO */
17275 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17276 output_set_got (tmp, NULL_RTX);
17278 xops[1] = tmp;
17279 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17280 output_asm_insn ("jmp\t{*}%1", xops);
17285 static void
17286 x86_file_start (void)
17288 default_file_start ();
17289 if (X86_FILE_START_VERSION_DIRECTIVE)
17290 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17291 if (X86_FILE_START_FLTUSED)
17292 fputs ("\t.global\t__fltused\n", asm_out_file);
17293 if (ix86_asm_dialect == ASM_INTEL)
17294 fputs ("\t.intel_syntax\n", asm_out_file);
17298 x86_field_alignment (tree field, int computed)
17300 enum machine_mode mode;
17301 tree type = TREE_TYPE (field);
17303 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17304 return computed;
17305 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17306 ? get_inner_array_type (type) : type);
17307 if (mode == DFmode || mode == DCmode
17308 || GET_MODE_CLASS (mode) == MODE_INT
17309 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17310 return MIN (32, computed);
17311 return computed;
17314 /* Output assembler code to FILE to increment profiler label # LABELNO
17315 for profiling a function entry. */
17316 void
17317 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17319 if (TARGET_64BIT)
17320 if (flag_pic)
17322 #ifndef NO_PROFILE_COUNTERS
17323 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17324 #endif
17325 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17327 else
17329 #ifndef NO_PROFILE_COUNTERS
17330 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17331 #endif
17332 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17334 else if (flag_pic)
17336 #ifndef NO_PROFILE_COUNTERS
17337 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17338 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17339 #endif
17340 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17342 else
17344 #ifndef NO_PROFILE_COUNTERS
17345 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17346 PROFILE_COUNT_REGISTER);
17347 #endif
17348 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17352 /* We don't have exact information about the insn sizes, but we may assume
17353 quite safely that we are informed about all 1 byte insns and memory
17354 address sizes. This is enough to eliminate unnecessary padding in
17355 99% of cases. */
17357 static int
17358 min_insn_size (rtx insn)
17360 int l = 0;
17362 if (!INSN_P (insn) || !active_insn_p (insn))
17363 return 0;
17365 /* Discard alignments we've emit and jump instructions. */
17366 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17367 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17368 return 0;
17369 if (GET_CODE (insn) == JUMP_INSN
17370 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17371 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17372 return 0;
17374 /* Important case - calls are always 5 bytes.
17375 It is common to have many calls in the row. */
17376 if (GET_CODE (insn) == CALL_INSN
17377 && symbolic_reference_mentioned_p (PATTERN (insn))
17378 && !SIBLING_CALL_P (insn))
17379 return 5;
17380 if (get_attr_length (insn) <= 1)
17381 return 1;
17383 /* For normal instructions we may rely on the sizes of addresses
17384 and the presence of symbol to require 4 bytes of encoding.
17385 This is not the case for jumps where references are PC relative. */
17386 if (GET_CODE (insn) != JUMP_INSN)
17388 l = get_attr_length_address (insn);
17389 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17390 l = 4;
17392 if (l)
17393 return 1+l;
17394 else
17395 return 2;
17398 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17399 window. */
17401 static void
17402 ix86_avoid_jump_misspredicts (void)
17404 rtx insn, start = get_insns ();
17405 int nbytes = 0, njumps = 0;
17406 int isjump = 0;
17408 /* Look for all minimal intervals of instructions containing 4 jumps.
17409 The intervals are bounded by START and INSN. NBYTES is the total
17410 size of instructions in the interval including INSN and not including
17411 START. When the NBYTES is smaller than 16 bytes, it is possible
17412 that the end of START and INSN ends up in the same 16byte page.
17414 The smallest offset in the page INSN can start is the case where START
17415 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17416 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17418 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17421 nbytes += min_insn_size (insn);
17422 if (dump_file)
17423 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17424 INSN_UID (insn), min_insn_size (insn));
17425 if ((GET_CODE (insn) == JUMP_INSN
17426 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17427 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17428 || GET_CODE (insn) == CALL_INSN)
17429 njumps++;
17430 else
17431 continue;
17433 while (njumps > 3)
17435 start = NEXT_INSN (start);
17436 if ((GET_CODE (start) == JUMP_INSN
17437 && GET_CODE (PATTERN (start)) != ADDR_VEC
17438 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17439 || GET_CODE (start) == CALL_INSN)
17440 njumps--, isjump = 1;
17441 else
17442 isjump = 0;
17443 nbytes -= min_insn_size (start);
17445 gcc_assert (njumps >= 0);
17446 if (dump_file)
17447 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17448 INSN_UID (start), INSN_UID (insn), nbytes);
17450 if (njumps == 3 && isjump && nbytes < 16)
17452 int padsize = 15 - nbytes + min_insn_size (insn);
17454 if (dump_file)
17455 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17456 INSN_UID (insn), padsize);
17457 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17462 /* AMD Athlon works faster
17463 when RET is not destination of conditional jump or directly preceded
17464 by other jump instruction. We avoid the penalty by inserting NOP just
17465 before the RET instructions in such cases. */
17466 static void
17467 ix86_pad_returns (void)
17469 edge e;
17470 edge_iterator ei;
17472 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17474 basic_block bb = e->src;
17475 rtx ret = BB_END (bb);
17476 rtx prev;
17477 bool replace = false;
17479 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17480 || !maybe_hot_bb_p (bb))
17481 continue;
17482 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17483 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17484 break;
17485 if (prev && GET_CODE (prev) == CODE_LABEL)
17487 edge e;
17488 edge_iterator ei;
17490 FOR_EACH_EDGE (e, ei, bb->preds)
17491 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17492 && !(e->flags & EDGE_FALLTHRU))
17493 replace = true;
17495 if (!replace)
17497 prev = prev_active_insn (ret);
17498 if (prev
17499 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17500 || GET_CODE (prev) == CALL_INSN))
17501 replace = true;
17502 /* Empty functions get branch mispredict even when the jump destination
17503 is not visible to us. */
17504 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17505 replace = true;
17507 if (replace)
17509 emit_insn_before (gen_return_internal_long (), ret);
17510 delete_insn (ret);
17515 /* Implement machine specific optimizations. We implement padding of returns
17516 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17517 static void
17518 ix86_reorg (void)
17520 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
17521 ix86_pad_returns ();
17522 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17523 ix86_avoid_jump_misspredicts ();
17526 /* Return nonzero when QImode register that must be represented via REX prefix
17527 is used. */
17528 bool
17529 x86_extended_QIreg_mentioned_p (rtx insn)
17531 int i;
17532 extract_insn_cached (insn);
17533 for (i = 0; i < recog_data.n_operands; i++)
17534 if (REG_P (recog_data.operand[i])
17535 && REGNO (recog_data.operand[i]) >= 4)
17536 return true;
17537 return false;
17540 /* Return nonzero when P points to register encoded via REX prefix.
17541 Called via for_each_rtx. */
17542 static int
17543 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17545 unsigned int regno;
17546 if (!REG_P (*p))
17547 return 0;
17548 regno = REGNO (*p);
17549 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17552 /* Return true when INSN mentions register that must be encoded using REX
17553 prefix. */
17554 bool
17555 x86_extended_reg_mentioned_p (rtx insn)
17557 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17560 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17561 optabs would emit if we didn't have TFmode patterns. */
17563 void
17564 x86_emit_floatuns (rtx operands[2])
17566 rtx neglab, donelab, i0, i1, f0, in, out;
17567 enum machine_mode mode, inmode;
17569 inmode = GET_MODE (operands[1]);
17570 gcc_assert (inmode == SImode || inmode == DImode);
17572 out = operands[0];
17573 in = force_reg (inmode, operands[1]);
17574 mode = GET_MODE (out);
17575 neglab = gen_label_rtx ();
17576 donelab = gen_label_rtx ();
17577 i1 = gen_reg_rtx (Pmode);
17578 f0 = gen_reg_rtx (mode);
17580 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17582 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17583 emit_jump_insn (gen_jump (donelab));
17584 emit_barrier ();
17586 emit_label (neglab);
17588 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17589 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17590 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17591 expand_float (f0, i0, 0);
17592 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17594 emit_label (donelab);
17597 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17598 with all elements equal to VAR. Return true if successful. */
17600 static bool
17601 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17602 rtx target, rtx val)
17604 enum machine_mode smode, wsmode, wvmode;
17605 rtx x;
17607 switch (mode)
17609 case V2SImode:
17610 case V2SFmode:
17611 if (!mmx_ok && !TARGET_SSE)
17612 return false;
17613 /* FALLTHRU */
17615 case V2DFmode:
17616 case V2DImode:
17617 case V4SFmode:
17618 case V4SImode:
17619 val = force_reg (GET_MODE_INNER (mode), val);
17620 x = gen_rtx_VEC_DUPLICATE (mode, val);
17621 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17622 return true;
17624 case V4HImode:
17625 if (!mmx_ok)
17626 return false;
17627 if (TARGET_SSE || TARGET_3DNOW_A)
17629 val = gen_lowpart (SImode, val);
17630 x = gen_rtx_TRUNCATE (HImode, val);
17631 x = gen_rtx_VEC_DUPLICATE (mode, x);
17632 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17633 return true;
17635 else
17637 smode = HImode;
17638 wsmode = SImode;
17639 wvmode = V2SImode;
17640 goto widen;
17643 case V8QImode:
17644 if (!mmx_ok)
17645 return false;
17646 smode = QImode;
17647 wsmode = HImode;
17648 wvmode = V4HImode;
17649 goto widen;
17650 case V8HImode:
17651 smode = HImode;
17652 wsmode = SImode;
17653 wvmode = V4SImode;
17654 goto widen;
17655 case V16QImode:
17656 smode = QImode;
17657 wsmode = HImode;
17658 wvmode = V8HImode;
17659 goto widen;
17660 widen:
17661 /* Replicate the value once into the next wider mode and recurse. */
17662 val = convert_modes (wsmode, smode, val, true);
17663 x = expand_simple_binop (wsmode, ASHIFT, val,
17664 GEN_INT (GET_MODE_BITSIZE (smode)),
17665 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17666 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
17668 x = gen_reg_rtx (wvmode);
17669 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
17670 gcc_unreachable ();
17671 emit_move_insn (target, gen_lowpart (mode, x));
17672 return true;
17674 default:
17675 return false;
17679 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17680 whose low element is VAR, and other elements are zero. Return true
17681 if successful. */
17683 static bool
17684 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
17685 rtx target, rtx var)
17687 enum machine_mode vsimode;
17688 rtx x;
17690 switch (mode)
17692 case V2SFmode:
17693 case V2SImode:
17694 if (!mmx_ok && !TARGET_SSE)
17695 return false;
17696 /* FALLTHRU */
17698 case V2DFmode:
17699 case V2DImode:
17700 var = force_reg (GET_MODE_INNER (mode), var);
17701 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
17702 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17703 return true;
17705 case V4SFmode:
17706 case V4SImode:
17707 var = force_reg (GET_MODE_INNER (mode), var);
17708 x = gen_rtx_VEC_DUPLICATE (mode, var);
17709 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
17710 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17711 return true;
17713 case V8HImode:
17714 case V16QImode:
17715 vsimode = V4SImode;
17716 goto widen;
17717 case V4HImode:
17718 case V8QImode:
17719 if (!mmx_ok)
17720 return false;
17721 vsimode = V2SImode;
17722 goto widen;
17723 widen:
17724 /* Zero extend the variable element to SImode and recurse. */
17725 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
17727 x = gen_reg_rtx (vsimode);
17728 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
17729 gcc_unreachable ();
17731 emit_move_insn (target, gen_lowpart (mode, x));
17732 return true;
17734 default:
17735 return false;
17739 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17740 consisting of the values in VALS. It is known that all elements
17741 except ONE_VAR are constants. Return true if successful. */
17743 static bool
17744 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
17745 rtx target, rtx vals, int one_var)
17747 rtx var = XVECEXP (vals, 0, one_var);
17748 enum machine_mode wmode;
17749 rtx const_vec, x;
17751 const_vec = copy_rtx (vals);
17752 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
17753 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
17755 switch (mode)
17757 case V2DFmode:
17758 case V2DImode:
17759 case V2SFmode:
17760 case V2SImode:
17761 /* For the two element vectors, it's just as easy to use
17762 the general case. */
17763 return false;
17765 case V4SFmode:
17766 case V4SImode:
17767 case V8HImode:
17768 case V4HImode:
17769 break;
17771 case V16QImode:
17772 wmode = V8HImode;
17773 goto widen;
17774 case V8QImode:
17775 wmode = V4HImode;
17776 goto widen;
17777 widen:
17778 /* There's no way to set one QImode entry easily. Combine
17779 the variable value with its adjacent constant value, and
17780 promote to an HImode set. */
17781 x = XVECEXP (vals, 0, one_var ^ 1);
17782 if (one_var & 1)
17784 var = convert_modes (HImode, QImode, var, true);
17785 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
17786 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17787 x = GEN_INT (INTVAL (x) & 0xff);
17789 else
17791 var = convert_modes (HImode, QImode, var, true);
17792 x = gen_int_mode (INTVAL (x) << 8, HImode);
17794 if (x != const0_rtx)
17795 var = expand_simple_binop (HImode, IOR, var, x, var,
17796 1, OPTAB_LIB_WIDEN);
17798 x = gen_reg_rtx (wmode);
17799 emit_move_insn (x, gen_lowpart (wmode, const_vec));
17800 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
17802 emit_move_insn (target, gen_lowpart (mode, x));
17803 return true;
17805 default:
17806 return false;
17809 emit_move_insn (target, const_vec);
17810 ix86_expand_vector_set (mmx_ok, target, var, one_var);
17811 return true;
17814 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
17815 all values variable, and none identical. */
17817 static void
17818 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
17819 rtx target, rtx vals)
17821 enum machine_mode half_mode = GET_MODE_INNER (mode);
17822 rtx op0 = NULL, op1 = NULL;
17823 bool use_vec_concat = false;
17825 switch (mode)
17827 case V2SFmode:
17828 case V2SImode:
17829 if (!mmx_ok && !TARGET_SSE)
17830 break;
17831 /* FALLTHRU */
17833 case V2DFmode:
17834 case V2DImode:
17835 /* For the two element vectors, we always implement VEC_CONCAT. */
17836 op0 = XVECEXP (vals, 0, 0);
17837 op1 = XVECEXP (vals, 0, 1);
17838 use_vec_concat = true;
17839 break;
17841 case V4SFmode:
17842 half_mode = V2SFmode;
17843 goto half;
17844 case V4SImode:
17845 half_mode = V2SImode;
17846 goto half;
17847 half:
17849 rtvec v;
17851 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
17852 Recurse to load the two halves. */
17854 op0 = gen_reg_rtx (half_mode);
17855 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
17856 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
17858 op1 = gen_reg_rtx (half_mode);
17859 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
17860 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
17862 use_vec_concat = true;
17864 break;
17866 case V8HImode:
17867 case V16QImode:
17868 case V4HImode:
17869 case V8QImode:
17870 break;
17872 default:
17873 gcc_unreachable ();
17876 if (use_vec_concat)
17878 if (!register_operand (op0, half_mode))
17879 op0 = force_reg (half_mode, op0);
17880 if (!register_operand (op1, half_mode))
17881 op1 = force_reg (half_mode, op1);
17883 emit_insn (gen_rtx_SET (VOIDmode, target,
17884 gen_rtx_VEC_CONCAT (mode, op0, op1)));
17886 else
17888 int i, j, n_elts, n_words, n_elt_per_word;
17889 enum machine_mode inner_mode;
17890 rtx words[4], shift;
17892 inner_mode = GET_MODE_INNER (mode);
17893 n_elts = GET_MODE_NUNITS (mode);
17894 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
17895 n_elt_per_word = n_elts / n_words;
17896 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
17898 for (i = 0; i < n_words; ++i)
17900 rtx word = NULL_RTX;
17902 for (j = 0; j < n_elt_per_word; ++j)
17904 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
17905 elt = convert_modes (word_mode, inner_mode, elt, true);
17907 if (j == 0)
17908 word = elt;
17909 else
17911 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
17912 word, 1, OPTAB_LIB_WIDEN);
17913 word = expand_simple_binop (word_mode, IOR, word, elt,
17914 word, 1, OPTAB_LIB_WIDEN);
17918 words[i] = word;
17921 if (n_words == 1)
17922 emit_move_insn (target, gen_lowpart (mode, words[0]));
17923 else if (n_words == 2)
17925 rtx tmp = gen_reg_rtx (mode);
17926 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
17927 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
17928 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
17929 emit_move_insn (target, tmp);
17931 else if (n_words == 4)
17933 rtx tmp = gen_reg_rtx (V4SImode);
17934 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
17935 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
17936 emit_move_insn (target, gen_lowpart (mode, tmp));
17938 else
17939 gcc_unreachable ();
17943 /* Initialize vector TARGET via VALS. Suppress the use of MMX
17944 instructions unless MMX_OK is true. */
17946 void
17947 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
17949 enum machine_mode mode = GET_MODE (target);
17950 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17951 int n_elts = GET_MODE_NUNITS (mode);
17952 int n_var = 0, one_var = -1;
17953 bool all_same = true, all_const_zero = true;
17954 int i;
17955 rtx x;
17957 for (i = 0; i < n_elts; ++i)
17959 x = XVECEXP (vals, 0, i);
17960 if (!CONSTANT_P (x))
17961 n_var++, one_var = i;
17962 else if (x != CONST0_RTX (inner_mode))
17963 all_const_zero = false;
17964 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
17965 all_same = false;
17968 /* Constants are best loaded from the constant pool. */
17969 if (n_var == 0)
17971 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
17972 return;
17975 /* If all values are identical, broadcast the value. */
17976 if (all_same
17977 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
17978 XVECEXP (vals, 0, 0)))
17979 return;
17981 /* Values where only one field is non-constant are best loaded from
17982 the pool and overwritten via move later. */
17983 if (n_var == 1)
17985 if (all_const_zero && one_var == 0
17986 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
17987 XVECEXP (vals, 0, 0)))
17988 return;
17990 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
17991 return;
17994 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
17997 void
17998 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18000 enum machine_mode mode = GET_MODE (target);
18001 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18002 bool use_vec_merge = false;
18003 rtx tmp;
18005 switch (mode)
18007 case V2SFmode:
18008 case V2SImode:
18009 if (mmx_ok)
18011 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18012 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18013 if (elt == 0)
18014 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18015 else
18016 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18017 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18018 return;
18020 break;
18022 case V2DFmode:
18023 case V2DImode:
18025 rtx op0, op1;
18027 /* For the two element vectors, we implement a VEC_CONCAT with
18028 the extraction of the other element. */
18030 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18031 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18033 if (elt == 0)
18034 op0 = val, op1 = tmp;
18035 else
18036 op0 = tmp, op1 = val;
18038 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18039 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18041 return;
18043 case V4SFmode:
18044 switch (elt)
18046 case 0:
18047 use_vec_merge = true;
18048 break;
18050 case 1:
18051 /* tmp = target = A B C D */
18052 tmp = copy_to_reg (target);
18053 /* target = A A B B */
18054 emit_insn (gen_sse_unpcklps (target, target, target));
18055 /* target = X A B B */
18056 ix86_expand_vector_set (false, target, val, 0);
18057 /* target = A X C D */
18058 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18059 GEN_INT (1), GEN_INT (0),
18060 GEN_INT (2+4), GEN_INT (3+4)));
18061 return;
18063 case 2:
18064 /* tmp = target = A B C D */
18065 tmp = copy_to_reg (target);
18066 /* tmp = X B C D */
18067 ix86_expand_vector_set (false, tmp, val, 0);
18068 /* target = A B X D */
18069 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18070 GEN_INT (0), GEN_INT (1),
18071 GEN_INT (0+4), GEN_INT (3+4)));
18072 return;
18074 case 3:
18075 /* tmp = target = A B C D */
18076 tmp = copy_to_reg (target);
18077 /* tmp = X B C D */
18078 ix86_expand_vector_set (false, tmp, val, 0);
18079 /* target = A B X D */
18080 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18081 GEN_INT (0), GEN_INT (1),
18082 GEN_INT (2+4), GEN_INT (0+4)));
18083 return;
18085 default:
18086 gcc_unreachable ();
18088 break;
18090 case V4SImode:
18091 /* Element 0 handled by vec_merge below. */
18092 if (elt == 0)
18094 use_vec_merge = true;
18095 break;
18098 if (TARGET_SSE2)
18100 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18101 store into element 0, then shuffle them back. */
18103 rtx order[4];
18105 order[0] = GEN_INT (elt);
18106 order[1] = const1_rtx;
18107 order[2] = const2_rtx;
18108 order[3] = GEN_INT (3);
18109 order[elt] = const0_rtx;
18111 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18112 order[1], order[2], order[3]));
18114 ix86_expand_vector_set (false, target, val, 0);
18116 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18117 order[1], order[2], order[3]));
18119 else
18121 /* For SSE1, we have to reuse the V4SF code. */
18122 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18123 gen_lowpart (SFmode, val), elt);
18125 return;
18127 case V8HImode:
18128 use_vec_merge = TARGET_SSE2;
18129 break;
18130 case V4HImode:
18131 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18132 break;
18134 case V16QImode:
18135 case V8QImode:
18136 default:
18137 break;
18140 if (use_vec_merge)
18142 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18143 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18144 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18146 else
18148 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18150 emit_move_insn (mem, target);
18152 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18153 emit_move_insn (tmp, val);
18155 emit_move_insn (target, mem);
18159 void
18160 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18162 enum machine_mode mode = GET_MODE (vec);
18163 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18164 bool use_vec_extr = false;
18165 rtx tmp;
18167 switch (mode)
18169 case V2SImode:
18170 case V2SFmode:
18171 if (!mmx_ok)
18172 break;
18173 /* FALLTHRU */
18175 case V2DFmode:
18176 case V2DImode:
18177 use_vec_extr = true;
18178 break;
18180 case V4SFmode:
18181 switch (elt)
18183 case 0:
18184 tmp = vec;
18185 break;
18187 case 1:
18188 case 3:
18189 tmp = gen_reg_rtx (mode);
18190 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18191 GEN_INT (elt), GEN_INT (elt),
18192 GEN_INT (elt+4), GEN_INT (elt+4)));
18193 break;
18195 case 2:
18196 tmp = gen_reg_rtx (mode);
18197 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18198 break;
18200 default:
18201 gcc_unreachable ();
18203 vec = tmp;
18204 use_vec_extr = true;
18205 elt = 0;
18206 break;
18208 case V4SImode:
18209 if (TARGET_SSE2)
18211 switch (elt)
18213 case 0:
18214 tmp = vec;
18215 break;
18217 case 1:
18218 case 3:
18219 tmp = gen_reg_rtx (mode);
18220 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18221 GEN_INT (elt), GEN_INT (elt),
18222 GEN_INT (elt), GEN_INT (elt)));
18223 break;
18225 case 2:
18226 tmp = gen_reg_rtx (mode);
18227 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18228 break;
18230 default:
18231 gcc_unreachable ();
18233 vec = tmp;
18234 use_vec_extr = true;
18235 elt = 0;
18237 else
18239 /* For SSE1, we have to reuse the V4SF code. */
18240 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18241 gen_lowpart (V4SFmode, vec), elt);
18242 return;
18244 break;
18246 case V8HImode:
18247 use_vec_extr = TARGET_SSE2;
18248 break;
18249 case V4HImode:
18250 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18251 break;
18253 case V16QImode:
18254 case V8QImode:
18255 /* ??? Could extract the appropriate HImode element and shift. */
18256 default:
18257 break;
18260 if (use_vec_extr)
18262 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18263 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18265 /* Let the rtl optimizers know about the zero extension performed. */
18266 if (inner_mode == HImode)
18268 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18269 target = gen_lowpart (SImode, target);
18272 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18274 else
18276 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18278 emit_move_insn (mem, vec);
18280 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18281 emit_move_insn (target, tmp);
18285 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18286 pattern to reduce; DEST is the destination; IN is the input vector. */
18288 void
18289 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18291 rtx tmp1, tmp2, tmp3;
18293 tmp1 = gen_reg_rtx (V4SFmode);
18294 tmp2 = gen_reg_rtx (V4SFmode);
18295 tmp3 = gen_reg_rtx (V4SFmode);
18297 emit_insn (gen_sse_movhlps (tmp1, in, in));
18298 emit_insn (fn (tmp2, tmp1, in));
18300 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18301 GEN_INT (1), GEN_INT (1),
18302 GEN_INT (1+4), GEN_INT (1+4)));
18303 emit_insn (fn (dest, tmp2, tmp3));
18306 /* Implements target hook vector_mode_supported_p. */
18307 static bool
18308 ix86_vector_mode_supported_p (enum machine_mode mode)
18310 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18311 return true;
18312 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18313 return true;
18314 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18315 return true;
18316 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18317 return true;
18318 return false;
18321 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18323 We do this in the new i386 backend to maintain source compatibility
18324 with the old cc0-based compiler. */
18326 static tree
18327 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18328 tree inputs ATTRIBUTE_UNUSED,
18329 tree clobbers)
18331 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18332 clobbers);
18333 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18334 clobbers);
18335 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18336 clobbers);
18337 return clobbers;
18340 /* Return true if this goes in small data/bss. */
18342 static bool
18343 ix86_in_large_data_p (tree exp)
18345 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18346 return false;
18348 /* Functions are never large data. */
18349 if (TREE_CODE (exp) == FUNCTION_DECL)
18350 return false;
18352 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18354 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18355 if (strcmp (section, ".ldata") == 0
18356 || strcmp (section, ".lbss") == 0)
18357 return true;
18358 return false;
18360 else
18362 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18364 /* If this is an incomplete type with size 0, then we can't put it
18365 in data because it might be too big when completed. */
18366 if (!size || size > ix86_section_threshold)
18367 return true;
18370 return false;
18372 static void
18373 ix86_encode_section_info (tree decl, rtx rtl, int first)
18375 default_encode_section_info (decl, rtl, first);
18377 if (TREE_CODE (decl) == VAR_DECL
18378 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18379 && ix86_in_large_data_p (decl))
18380 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18383 /* Worker function for REVERSE_CONDITION. */
18385 enum rtx_code
18386 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18388 return (mode != CCFPmode && mode != CCFPUmode
18389 ? reverse_condition (code)
18390 : reverse_condition_maybe_unordered (code));
18393 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18394 to OPERANDS[0]. */
18396 const char *
18397 output_387_reg_move (rtx insn, rtx *operands)
18399 if (REG_P (operands[1])
18400 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18402 if (REGNO (operands[0]) == FIRST_STACK_REG
18403 && TARGET_USE_FFREEP)
18404 return "ffreep\t%y0";
18405 return "fstp\t%y0";
18407 if (STACK_TOP_P (operands[0]))
18408 return "fld%z1\t%y1";
18409 return "fst\t%y0";
18412 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18413 FP status register is set. */
18415 void
18416 ix86_emit_fp_unordered_jump (rtx label)
18418 rtx reg = gen_reg_rtx (HImode);
18419 rtx temp;
18421 emit_insn (gen_x86_fnstsw_1 (reg));
18423 if (TARGET_USE_SAHF)
18425 emit_insn (gen_x86_sahf_1 (reg));
18427 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18428 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18430 else
18432 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18434 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18435 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18438 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18439 gen_rtx_LABEL_REF (VOIDmode, label),
18440 pc_rtx);
18441 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18442 emit_jump_insn (temp);
18445 /* Output code to perform a log1p XFmode calculation. */
18447 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18449 rtx label1 = gen_label_rtx ();
18450 rtx label2 = gen_label_rtx ();
18452 rtx tmp = gen_reg_rtx (XFmode);
18453 rtx tmp2 = gen_reg_rtx (XFmode);
18455 emit_insn (gen_absxf2 (tmp, op1));
18456 emit_insn (gen_cmpxf (tmp,
18457 CONST_DOUBLE_FROM_REAL_VALUE (
18458 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18459 XFmode)));
18460 emit_jump_insn (gen_bge (label1));
18462 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18463 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18464 emit_jump (label2);
18466 emit_label (label1);
18467 emit_move_insn (tmp, CONST1_RTX (XFmode));
18468 emit_insn (gen_addxf3 (tmp, op1, tmp));
18469 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18470 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18472 emit_label (label2);
18475 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18477 static void
18478 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18479 tree decl)
18481 /* With Binutils 2.15, the "@unwind" marker must be specified on
18482 every occurrence of the ".eh_frame" section, not just the first
18483 one. */
18484 if (TARGET_64BIT
18485 && strcmp (name, ".eh_frame") == 0)
18487 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18488 flags & SECTION_WRITE ? "aw" : "a");
18489 return;
18491 default_elf_asm_named_section (name, flags, decl);
18494 /* Return the mangling of TYPE if it is an extended fundamental type. */
18496 static const char *
18497 ix86_mangle_fundamental_type (tree type)
18499 switch (TYPE_MODE (type))
18501 case TFmode:
18502 /* __float128 is "g". */
18503 return "g";
18504 case XFmode:
18505 /* "long double" or __float80 is "e". */
18506 return "e";
18507 default:
18508 return NULL;
18512 /* For 32-bit code we can save PIC register setup by using
18513 __stack_chk_fail_local hidden function instead of calling
18514 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18515 register, so it is better to call __stack_chk_fail directly. */
18517 static tree
18518 ix86_stack_protect_fail (void)
18520 return TARGET_64BIT
18521 ? default_external_stack_protect_fail ()
18522 : default_hidden_stack_protect_fail ();
18525 /* Select a format to encode pointers in exception handling data. CODE
18526 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18527 true if the symbol may be affected by dynamic relocations.
18529 ??? All x86 object file formats are capable of representing this.
18530 After all, the relocation needed is the same as for the call insn.
18531 Whether or not a particular assembler allows us to enter such, I
18532 guess we'll have to see. */
18534 asm_preferred_eh_data_format (int code, int global)
18536 if (flag_pic)
18538 int type = DW_EH_PE_sdata8;
18539 if (!TARGET_64BIT
18540 || ix86_cmodel == CM_SMALL_PIC
18541 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18542 type = DW_EH_PE_sdata4;
18543 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18545 if (ix86_cmodel == CM_SMALL
18546 || (ix86_cmodel == CM_MEDIUM && code))
18547 return DW_EH_PE_udata4;
18548 return DW_EH_PE_absptr;
18551 #include "gt-i386.h"