1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
53 #ifndef CHECK_STACK_LIMIT
54 #define CHECK_STACK_LIMIT (-1)
57 /* Return index of given mode in mult and division cost tables. */
58 #define MODE_INDEX(mode) \
59 ((mode) == QImode ? 0 \
60 : (mode) == HImode ? 1 \
61 : (mode) == SImode ? 2 \
62 : (mode) == DImode ? 3 \
65 /* Processor costs (relative to an add) */
66 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
67 #define COSTS_N_BYTES(N) ((N) * 2)
70 struct processor_costs size_cost
= { /* costs for tunning for size */
71 COSTS_N_BYTES (2), /* cost of an add instruction */
72 COSTS_N_BYTES (3), /* cost of a lea instruction */
73 COSTS_N_BYTES (2), /* variable shift costs */
74 COSTS_N_BYTES (3), /* constant shift costs */
75 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
76 COSTS_N_BYTES (3), /* HI */
77 COSTS_N_BYTES (3), /* SI */
78 COSTS_N_BYTES (3), /* DI */
79 COSTS_N_BYTES (5)}, /* other */
80 0, /* cost of multiply per each bit set */
81 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 COSTS_N_BYTES (3), /* cost of movsx */
87 COSTS_N_BYTES (3), /* cost of movzx */
90 2, /* cost for loading QImode using movzbl */
91 {2, 2, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 2, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {2, 2, 2}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {2, 2, 2}, /* cost of loading integer registers */
99 3, /* cost of moving MMX register */
100 {3, 3}, /* cost of loading MMX registers
101 in SImode and DImode */
102 {3, 3}, /* cost of storing MMX registers
103 in SImode and DImode */
104 3, /* cost of moving SSE register */
105 {3, 3, 3}, /* cost of loading SSE registers
106 in SImode, DImode and TImode */
107 {3, 3, 3}, /* cost of storing SSE registers
108 in SImode, DImode and TImode */
109 3, /* MMX or SSE register to integer */
110 0, /* size of prefetch block */
111 0, /* number of parallel prefetches */
113 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
114 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
115 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
116 COSTS_N_BYTES (2), /* cost of FABS instruction. */
117 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
118 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
121 /* Processor costs (relative to an add) */
123 struct processor_costs i386_cost
= { /* 386 specific costs */
124 COSTS_N_INSNS (1), /* cost of an add instruction */
125 COSTS_N_INSNS (1), /* cost of a lea instruction */
126 COSTS_N_INSNS (3), /* variable shift costs */
127 COSTS_N_INSNS (2), /* constant shift costs */
128 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
129 COSTS_N_INSNS (6), /* HI */
130 COSTS_N_INSNS (6), /* SI */
131 COSTS_N_INSNS (6), /* DI */
132 COSTS_N_INSNS (6)}, /* other */
133 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
134 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
135 COSTS_N_INSNS (23), /* HI */
136 COSTS_N_INSNS (23), /* SI */
137 COSTS_N_INSNS (23), /* DI */
138 COSTS_N_INSNS (23)}, /* other */
139 COSTS_N_INSNS (3), /* cost of movsx */
140 COSTS_N_INSNS (2), /* cost of movzx */
141 15, /* "large" insn */
143 4, /* cost for loading QImode using movzbl */
144 {2, 4, 2}, /* cost of loading integer registers
145 in QImode, HImode and SImode.
146 Relative to reg-reg move (2). */
147 {2, 4, 2}, /* cost of storing integer registers */
148 2, /* cost of reg,reg fld/fst */
149 {8, 8, 8}, /* cost of loading fp registers
150 in SFmode, DFmode and XFmode */
151 {8, 8, 8}, /* cost of loading integer registers */
152 2, /* cost of moving MMX register */
153 {4, 8}, /* cost of loading MMX registers
154 in SImode and DImode */
155 {4, 8}, /* cost of storing MMX registers
156 in SImode and DImode */
157 2, /* cost of moving SSE register */
158 {4, 8, 16}, /* cost of loading SSE registers
159 in SImode, DImode and TImode */
160 {4, 8, 16}, /* cost of storing SSE registers
161 in SImode, DImode and TImode */
162 3, /* MMX or SSE register to integer */
163 0, /* size of prefetch block */
164 0, /* number of parallel prefetches */
166 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
167 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
168 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
169 COSTS_N_INSNS (22), /* cost of FABS instruction. */
170 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
171 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
175 struct processor_costs i486_cost
= { /* 486 specific costs */
176 COSTS_N_INSNS (1), /* cost of an add instruction */
177 COSTS_N_INSNS (1), /* cost of a lea instruction */
178 COSTS_N_INSNS (3), /* variable shift costs */
179 COSTS_N_INSNS (2), /* constant shift costs */
180 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
181 COSTS_N_INSNS (12), /* HI */
182 COSTS_N_INSNS (12), /* SI */
183 COSTS_N_INSNS (12), /* DI */
184 COSTS_N_INSNS (12)}, /* other */
185 1, /* cost of multiply per each bit set */
186 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
187 COSTS_N_INSNS (40), /* HI */
188 COSTS_N_INSNS (40), /* SI */
189 COSTS_N_INSNS (40), /* DI */
190 COSTS_N_INSNS (40)}, /* other */
191 COSTS_N_INSNS (3), /* cost of movsx */
192 COSTS_N_INSNS (2), /* cost of movzx */
193 15, /* "large" insn */
195 4, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {8, 8, 8}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {8, 8, 8}, /* cost of loading integer registers */
204 2, /* cost of moving MMX register */
205 {4, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {4, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
218 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
219 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
220 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
221 COSTS_N_INSNS (3), /* cost of FABS instruction. */
222 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
223 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
227 struct processor_costs pentium_cost
= {
228 COSTS_N_INSNS (1), /* cost of an add instruction */
229 COSTS_N_INSNS (1), /* cost of a lea instruction */
230 COSTS_N_INSNS (4), /* variable shift costs */
231 COSTS_N_INSNS (1), /* constant shift costs */
232 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
233 COSTS_N_INSNS (11), /* HI */
234 COSTS_N_INSNS (11), /* SI */
235 COSTS_N_INSNS (11), /* DI */
236 COSTS_N_INSNS (11)}, /* other */
237 0, /* cost of multiply per each bit set */
238 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
239 COSTS_N_INSNS (25), /* HI */
240 COSTS_N_INSNS (25), /* SI */
241 COSTS_N_INSNS (25), /* DI */
242 COSTS_N_INSNS (25)}, /* other */
243 COSTS_N_INSNS (3), /* cost of movsx */
244 COSTS_N_INSNS (2), /* cost of movzx */
245 8, /* "large" insn */
247 6, /* cost for loading QImode using movzbl */
248 {2, 4, 2}, /* cost of loading integer registers
249 in QImode, HImode and SImode.
250 Relative to reg-reg move (2). */
251 {2, 4, 2}, /* cost of storing integer registers */
252 2, /* cost of reg,reg fld/fst */
253 {2, 2, 6}, /* cost of loading fp registers
254 in SFmode, DFmode and XFmode */
255 {4, 4, 6}, /* cost of loading integer registers */
256 8, /* cost of moving MMX register */
257 {8, 8}, /* cost of loading MMX registers
258 in SImode and DImode */
259 {8, 8}, /* cost of storing MMX registers
260 in SImode and DImode */
261 2, /* cost of moving SSE register */
262 {4, 8, 16}, /* cost of loading SSE registers
263 in SImode, DImode and TImode */
264 {4, 8, 16}, /* cost of storing SSE registers
265 in SImode, DImode and TImode */
266 3, /* MMX or SSE register to integer */
267 0, /* size of prefetch block */
268 0, /* number of parallel prefetches */
270 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
271 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
272 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
273 COSTS_N_INSNS (1), /* cost of FABS instruction. */
274 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
275 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
279 struct processor_costs pentiumpro_cost
= {
280 COSTS_N_INSNS (1), /* cost of an add instruction */
281 COSTS_N_INSNS (1), /* cost of a lea instruction */
282 COSTS_N_INSNS (1), /* variable shift costs */
283 COSTS_N_INSNS (1), /* constant shift costs */
284 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
285 COSTS_N_INSNS (4), /* HI */
286 COSTS_N_INSNS (4), /* SI */
287 COSTS_N_INSNS (4), /* DI */
288 COSTS_N_INSNS (4)}, /* other */
289 0, /* cost of multiply per each bit set */
290 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
291 COSTS_N_INSNS (17), /* HI */
292 COSTS_N_INSNS (17), /* SI */
293 COSTS_N_INSNS (17), /* DI */
294 COSTS_N_INSNS (17)}, /* other */
295 COSTS_N_INSNS (1), /* cost of movsx */
296 COSTS_N_INSNS (1), /* cost of movzx */
297 8, /* "large" insn */
299 2, /* cost for loading QImode using movzbl */
300 {4, 4, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 2, 2}, /* cost of storing integer registers */
304 2, /* cost of reg,reg fld/fst */
305 {2, 2, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 6}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 3, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 6, /* number of parallel prefetches */
322 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
323 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
324 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
325 COSTS_N_INSNS (2), /* cost of FABS instruction. */
326 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
327 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
331 struct processor_costs k6_cost
= {
332 COSTS_N_INSNS (1), /* cost of an add instruction */
333 COSTS_N_INSNS (2), /* cost of a lea instruction */
334 COSTS_N_INSNS (1), /* variable shift costs */
335 COSTS_N_INSNS (1), /* constant shift costs */
336 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
337 COSTS_N_INSNS (3), /* HI */
338 COSTS_N_INSNS (3), /* SI */
339 COSTS_N_INSNS (3), /* DI */
340 COSTS_N_INSNS (3)}, /* other */
341 0, /* cost of multiply per each bit set */
342 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
343 COSTS_N_INSNS (18), /* HI */
344 COSTS_N_INSNS (18), /* SI */
345 COSTS_N_INSNS (18), /* DI */
346 COSTS_N_INSNS (18)}, /* other */
347 COSTS_N_INSNS (2), /* cost of movsx */
348 COSTS_N_INSNS (2), /* cost of movzx */
349 8, /* "large" insn */
351 3, /* cost for loading QImode using movzbl */
352 {4, 5, 4}, /* cost of loading integer registers
353 in QImode, HImode and SImode.
354 Relative to reg-reg move (2). */
355 {2, 3, 2}, /* cost of storing integer registers */
356 4, /* cost of reg,reg fld/fst */
357 {6, 6, 6}, /* cost of loading fp registers
358 in SFmode, DFmode and XFmode */
359 {4, 4, 4}, /* cost of loading integer registers */
360 2, /* cost of moving MMX register */
361 {2, 2}, /* cost of loading MMX registers
362 in SImode and DImode */
363 {2, 2}, /* cost of storing MMX registers
364 in SImode and DImode */
365 2, /* cost of moving SSE register */
366 {2, 2, 8}, /* cost of loading SSE registers
367 in SImode, DImode and TImode */
368 {2, 2, 8}, /* cost of storing SSE registers
369 in SImode, DImode and TImode */
370 6, /* MMX or SSE register to integer */
371 32, /* size of prefetch block */
372 1, /* number of parallel prefetches */
374 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
375 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
376 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
377 COSTS_N_INSNS (2), /* cost of FABS instruction. */
378 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
379 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
383 struct processor_costs athlon_cost
= {
384 COSTS_N_INSNS (1), /* cost of an add instruction */
385 COSTS_N_INSNS (2), /* cost of a lea instruction */
386 COSTS_N_INSNS (1), /* variable shift costs */
387 COSTS_N_INSNS (1), /* constant shift costs */
388 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
389 COSTS_N_INSNS (5), /* HI */
390 COSTS_N_INSNS (5), /* SI */
391 COSTS_N_INSNS (5), /* DI */
392 COSTS_N_INSNS (5)}, /* other */
393 0, /* cost of multiply per each bit set */
394 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
395 COSTS_N_INSNS (26), /* HI */
396 COSTS_N_INSNS (42), /* SI */
397 COSTS_N_INSNS (74), /* DI */
398 COSTS_N_INSNS (74)}, /* other */
399 COSTS_N_INSNS (1), /* cost of movsx */
400 COSTS_N_INSNS (1), /* cost of movzx */
401 8, /* "large" insn */
403 4, /* cost for loading QImode using movzbl */
404 {3, 4, 3}, /* cost of loading integer registers
405 in QImode, HImode and SImode.
406 Relative to reg-reg move (2). */
407 {3, 4, 3}, /* cost of storing integer registers */
408 4, /* cost of reg,reg fld/fst */
409 {4, 4, 12}, /* cost of loading fp registers
410 in SFmode, DFmode and XFmode */
411 {6, 6, 8}, /* cost of loading integer registers */
412 2, /* cost of moving MMX register */
413 {4, 4}, /* cost of loading MMX registers
414 in SImode and DImode */
415 {4, 4}, /* cost of storing MMX registers
416 in SImode and DImode */
417 2, /* cost of moving SSE register */
418 {4, 4, 6}, /* cost of loading SSE registers
419 in SImode, DImode and TImode */
420 {4, 4, 5}, /* cost of storing SSE registers
421 in SImode, DImode and TImode */
422 5, /* MMX or SSE register to integer */
423 64, /* size of prefetch block */
424 6, /* number of parallel prefetches */
426 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (2), /* cost of FABS instruction. */
430 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
435 struct processor_costs k8_cost
= {
436 COSTS_N_INSNS (1), /* cost of an add instruction */
437 COSTS_N_INSNS (2), /* cost of a lea instruction */
438 COSTS_N_INSNS (1), /* variable shift costs */
439 COSTS_N_INSNS (1), /* constant shift costs */
440 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
441 COSTS_N_INSNS (4), /* HI */
442 COSTS_N_INSNS (3), /* SI */
443 COSTS_N_INSNS (4), /* DI */
444 COSTS_N_INSNS (5)}, /* other */
445 0, /* cost of multiply per each bit set */
446 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
447 COSTS_N_INSNS (26), /* HI */
448 COSTS_N_INSNS (42), /* SI */
449 COSTS_N_INSNS (74), /* DI */
450 COSTS_N_INSNS (74)}, /* other */
451 COSTS_N_INSNS (1), /* cost of movsx */
452 COSTS_N_INSNS (1), /* cost of movzx */
453 8, /* "large" insn */
455 4, /* cost for loading QImode using movzbl */
456 {3, 4, 3}, /* cost of loading integer registers
457 in QImode, HImode and SImode.
458 Relative to reg-reg move (2). */
459 {3, 4, 3}, /* cost of storing integer registers */
460 4, /* cost of reg,reg fld/fst */
461 {4, 4, 12}, /* cost of loading fp registers
462 in SFmode, DFmode and XFmode */
463 {6, 6, 8}, /* cost of loading integer registers */
464 2, /* cost of moving MMX register */
465 {3, 3}, /* cost of loading MMX registers
466 in SImode and DImode */
467 {4, 4}, /* cost of storing MMX registers
468 in SImode and DImode */
469 2, /* cost of moving SSE register */
470 {4, 3, 6}, /* cost of loading SSE registers
471 in SImode, DImode and TImode */
472 {4, 4, 5}, /* cost of storing SSE registers
473 in SImode, DImode and TImode */
474 5, /* MMX or SSE register to integer */
475 64, /* size of prefetch block */
476 6, /* number of parallel prefetches */
478 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
479 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
480 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
481 COSTS_N_INSNS (2), /* cost of FABS instruction. */
482 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
483 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
487 struct processor_costs pentium4_cost
= {
488 COSTS_N_INSNS (1), /* cost of an add instruction */
489 COSTS_N_INSNS (3), /* cost of a lea instruction */
490 COSTS_N_INSNS (4), /* variable shift costs */
491 COSTS_N_INSNS (4), /* constant shift costs */
492 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
493 COSTS_N_INSNS (15), /* HI */
494 COSTS_N_INSNS (15), /* SI */
495 COSTS_N_INSNS (15), /* DI */
496 COSTS_N_INSNS (15)}, /* other */
497 0, /* cost of multiply per each bit set */
498 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
499 COSTS_N_INSNS (56), /* HI */
500 COSTS_N_INSNS (56), /* SI */
501 COSTS_N_INSNS (56), /* DI */
502 COSTS_N_INSNS (56)}, /* other */
503 COSTS_N_INSNS (1), /* cost of movsx */
504 COSTS_N_INSNS (1), /* cost of movzx */
505 16, /* "large" insn */
507 2, /* cost for loading QImode using movzbl */
508 {4, 5, 4}, /* cost of loading integer registers
509 in QImode, HImode and SImode.
510 Relative to reg-reg move (2). */
511 {2, 3, 2}, /* cost of storing integer registers */
512 2, /* cost of reg,reg fld/fst */
513 {2, 2, 6}, /* cost of loading fp registers
514 in SFmode, DFmode and XFmode */
515 {4, 4, 6}, /* cost of loading integer registers */
516 2, /* cost of moving MMX register */
517 {2, 2}, /* cost of loading MMX registers
518 in SImode and DImode */
519 {2, 2}, /* cost of storing MMX registers
520 in SImode and DImode */
521 12, /* cost of moving SSE register */
522 {12, 12, 12}, /* cost of loading SSE registers
523 in SImode, DImode and TImode */
524 {2, 2, 8}, /* cost of storing SSE registers
525 in SImode, DImode and TImode */
526 10, /* MMX or SSE register to integer */
527 64, /* size of prefetch block */
528 6, /* number of parallel prefetches */
530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
539 struct processor_costs nocona_cost
= {
540 COSTS_N_INSNS (1), /* cost of an add instruction */
541 COSTS_N_INSNS (1), /* cost of a lea instruction */
542 COSTS_N_INSNS (1), /* variable shift costs */
543 COSTS_N_INSNS (1), /* constant shift costs */
544 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
545 COSTS_N_INSNS (10), /* HI */
546 COSTS_N_INSNS (10), /* SI */
547 COSTS_N_INSNS (10), /* DI */
548 COSTS_N_INSNS (10)}, /* other */
549 0, /* cost of multiply per each bit set */
550 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
551 COSTS_N_INSNS (66), /* HI */
552 COSTS_N_INSNS (66), /* SI */
553 COSTS_N_INSNS (66), /* DI */
554 COSTS_N_INSNS (66)}, /* other */
555 COSTS_N_INSNS (1), /* cost of movsx */
556 COSTS_N_INSNS (1), /* cost of movzx */
557 16, /* "large" insn */
559 4, /* cost for loading QImode using movzbl */
560 {4, 4, 4}, /* cost of loading integer registers
561 in QImode, HImode and SImode.
562 Relative to reg-reg move (2). */
563 {4, 4, 4}, /* cost of storing integer registers */
564 3, /* cost of reg,reg fld/fst */
565 {12, 12, 12}, /* cost of loading fp registers
566 in SFmode, DFmode and XFmode */
567 {4, 4, 4}, /* cost of loading integer registers */
568 6, /* cost of moving MMX register */
569 {12, 12}, /* cost of loading MMX registers
570 in SImode and DImode */
571 {12, 12}, /* cost of storing MMX registers
572 in SImode and DImode */
573 6, /* cost of moving SSE register */
574 {12, 12, 12}, /* cost of loading SSE registers
575 in SImode, DImode and TImode */
576 {12, 12, 12}, /* cost of storing SSE registers
577 in SImode, DImode and TImode */
578 8, /* MMX or SSE register to integer */
579 128, /* size of prefetch block */
580 8, /* number of parallel prefetches */
582 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
583 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
584 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
585 COSTS_N_INSNS (3), /* cost of FABS instruction. */
586 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
587 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
590 /* Generic64 should produce code tuned for Nocona and K8. */
592 struct processor_costs generic64_cost
= {
593 COSTS_N_INSNS (1), /* cost of an add instruction */
594 /* On all chips taken into consideration lea is 2 cycles and more. With
595 this cost however our current implementation of synth_mult results in
596 use of unnecesary temporary registers causing regression on several
597 SPECfp benchmarks. */
598 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (4), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (4), /* DI */
605 COSTS_N_INSNS (2)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (26), /* HI */
609 COSTS_N_INSNS (42), /* SI */
610 COSTS_N_INSNS (74), /* DI */
611 COSTS_N_INSNS (74)}, /* other */
612 COSTS_N_INSNS (1), /* cost of movsx */
613 COSTS_N_INSNS (1), /* cost of movzx */
614 8, /* "large" insn */
616 4, /* cost for loading QImode using movzbl */
617 {4, 4, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {4, 4, 4}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {12, 12, 12}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {6, 6, 8}, /* cost of loading integer registers */
625 2, /* cost of moving MMX register */
626 {8, 8}, /* cost of loading MMX registers
627 in SImode and DImode */
628 {8, 8}, /* cost of storing MMX registers
629 in SImode and DImode */
630 2, /* cost of moving SSE register */
631 {8, 8, 8}, /* cost of loading SSE registers
632 in SImode, DImode and TImode */
633 {8, 8, 8}, /* cost of storing SSE registers
634 in SImode, DImode and TImode */
635 5, /* MMX or SSE register to integer */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
638 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
639 is increased to perhaps more appropriate value of 5. */
641 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
642 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
643 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
644 COSTS_N_INSNS (8), /* cost of FABS instruction. */
645 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
646 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
649 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
651 struct processor_costs generic32_cost
= {
652 COSTS_N_INSNS (1), /* cost of an add instruction */
653 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
654 COSTS_N_INSNS (1), /* variable shift costs */
655 COSTS_N_INSNS (1), /* constant shift costs */
656 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
657 COSTS_N_INSNS (4), /* HI */
658 COSTS_N_INSNS (3), /* SI */
659 COSTS_N_INSNS (4), /* DI */
660 COSTS_N_INSNS (2)}, /* other */
661 0, /* cost of multiply per each bit set */
662 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
663 COSTS_N_INSNS (26), /* HI */
664 COSTS_N_INSNS (42), /* SI */
665 COSTS_N_INSNS (74), /* DI */
666 COSTS_N_INSNS (74)}, /* other */
667 COSTS_N_INSNS (1), /* cost of movsx */
668 COSTS_N_INSNS (1), /* cost of movzx */
669 8, /* "large" insn */
671 4, /* cost for loading QImode using movzbl */
672 {4, 4, 4}, /* cost of loading integer registers
673 in QImode, HImode and SImode.
674 Relative to reg-reg move (2). */
675 {4, 4, 4}, /* cost of storing integer registers */
676 4, /* cost of reg,reg fld/fst */
677 {12, 12, 12}, /* cost of loading fp registers
678 in SFmode, DFmode and XFmode */
679 {6, 6, 8}, /* cost of loading integer registers */
680 2, /* cost of moving MMX register */
681 {8, 8}, /* cost of loading MMX registers
682 in SImode and DImode */
683 {8, 8}, /* cost of storing MMX registers
684 in SImode and DImode */
685 2, /* cost of moving SSE register */
686 {8, 8, 8}, /* cost of loading SSE registers
687 in SImode, DImode and TImode */
688 {8, 8, 8}, /* cost of storing SSE registers
689 in SImode, DImode and TImode */
690 5, /* MMX or SSE register to integer */
691 64, /* size of prefetch block */
692 6, /* number of parallel prefetches */
694 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
695 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
696 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
697 COSTS_N_INSNS (8), /* cost of FABS instruction. */
698 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
699 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
702 const struct processor_costs
*ix86_cost
= &pentium_cost
;
704 /* Processor feature/optimization bitmasks. */
705 #define m_386 (1<<PROCESSOR_I386)
706 #define m_486 (1<<PROCESSOR_I486)
707 #define m_PENT (1<<PROCESSOR_PENTIUM)
708 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
709 #define m_K6 (1<<PROCESSOR_K6)
710 #define m_ATHLON (1<<PROCESSOR_ATHLON)
711 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
712 #define m_K8 (1<<PROCESSOR_K8)
713 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
714 #define m_NOCONA (1<<PROCESSOR_NOCONA)
715 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
716 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
717 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
719 /* Generic instruction choice should be common subset of supported CPUs
720 (PPro/PENT4/NOCONA/Athlon/K8). */
722 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
723 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
724 generic because it is not working well with PPro base chips. */
725 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
| m_GENERIC64
;
726 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
727 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
728 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_GENERIC
/* m_386 | m_K6 */;
729 const int x86_double_with_add
= ~m_386
;
730 const int x86_use_bit_test
= m_386
;
731 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
| m_GENERIC
;
732 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
733 const int x86_fisttp
= m_NOCONA
;
734 const int x86_3dnow_a
= m_ATHLON_K8
;
735 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
736 /* Branch hints were put in P4 based on simulation result. But
737 after P4 was made, no performance benefit was observed with
738 branch hints. It also increases the code size. As the result,
739 icc never generates branch hints. */
740 const int x86_branch_hints
= 0;
741 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
| m_NOCONA
| m_GENERIC32
; /*m_GENERIC | m_ATHLON_K8 ? */
742 /* We probably ought to watch for partial register stalls on Generic32
743 compilation setting as well. However in current implementation the
744 partial register stalls are not eliminated very well - they can
745 be introduced via subregs synthetized by combine and can happen
746 in caller/callee saving sequences.
747 Because this option pays back little on PPro based chips and is in conflict
748 with partial reg. dependencies used by Athlon/P4 based chips, it is better
749 to leave it off for generic32 for now. */
750 const int x86_partial_reg_stall
= m_PPRO
;
751 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6
;
752 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
| m_GENERIC
);
753 const int x86_use_mov0
= m_K6
;
754 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_GENERIC
);
755 const int x86_read_modify_write
= ~m_PENT
;
756 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
757 const int x86_split_long_moves
= m_PPRO
;
758 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
| m_GENERIC
; /* m_PENT4 ? */
759 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
760 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
761 const int x86_qimode_math
= ~(0);
762 const int x86_promote_qi_regs
= 0;
763 /* On PPro this flag is meant to avoid partial register stalls. Just like
764 the x86_partial_reg_stall this option might be considered for Generic32
765 if our scheme for avoiding partial stalls was more effective. */
766 const int x86_himode_math
= ~(m_PPRO
);
767 const int x86_promote_hi_regs
= m_PPRO
;
768 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_GENERIC
;
769 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_GENERIC
;
770 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
| m_NOCONA
| m_GENERIC
;
771 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_GENERIC
;
772 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
);
773 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
774 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
775 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
;
776 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_GENERIC
;
777 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_GENERIC
;
778 const int x86_shift1
= ~m_486
;
779 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
780 /* In Generic model we have an confict here in between PPro/Pentium4 based chips
781 that thread 128bit SSE registers as single units versus K8 based chips that
782 divide SSE registers to two 64bit halves.
783 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
784 to allow register renaming on 128bit SSE units, but usually results in one
785 extra microop on 64bit SSE units. Experimental results shows that disabling
786 this option on P4 brings over 20% SPECfp regression, while enabling it on
787 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
789 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
;
790 /* Set for machines where the type and dependencies are resolved on SSE
791 register parts instead of whole registers, so we may maintain just
792 lower part of scalar values in proper format leaving the upper part
794 const int x86_sse_split_regs
= m_ATHLON_K8
;
795 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
796 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
797 const int x86_use_ffreep
= m_ATHLON_K8
;
798 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
799 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_GENERIC
);
801 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
802 integer data in xmm registers. Which results in pretty abysmal code. */
803 const int x86_inter_unit_moves
= 0 /* ~(m_ATHLON_K8) */;
805 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC32
;
806 /* Some CPU cores are not able to predict more than 4 branch instructions in
807 the 16 byte window. */
808 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
809 const int x86_schedule
= m_PPRO
| m_ATHLON_K8
| m_K6
| m_PENT
| m_GENERIC
;
810 const int x86_use_bt
= m_ATHLON_K8
;
811 /* Compare and exchange was added for 80486. */
812 const int x86_cmpxchg
= ~m_386
;
813 /* Compare and exchange 8 bytes was added for pentium. */
814 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
815 /* Compare and exchange 16 bytes was added for nocona. */
816 const int x86_cmpxchg16b
= m_NOCONA
;
817 /* Exchange and add was added for 80486. */
818 const int x86_xadd
= ~m_386
;
819 const int x86_pad_returns
= m_ATHLON_K8
| m_GENERIC
;
821 /* In case the average insn count for single function invocation is
822 lower than this constant, emit fast (but longer) prologue and
824 #define FAST_PROLOGUE_INSN_COUNT 20
826 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
827 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
828 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
829 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
831 /* Array of the smallest class containing reg number REGNO, indexed by
832 REGNO. Used by REGNO_REG_CLASS in i386.h. */
834 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
837 AREG
, DREG
, CREG
, BREG
,
839 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
841 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
842 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
845 /* flags, fpsr, dirflag, frame */
846 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
847 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
849 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
851 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
852 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
853 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
857 /* The "default" register map used in 32bit mode. */
859 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
861 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
862 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
863 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
864 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
865 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
866 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
867 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
870 static int const x86_64_int_parameter_registers
[6] =
872 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
873 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
876 static int const x86_64_int_return_registers
[4] =
878 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
881 /* The "default" register map used in 64bit mode. */
882 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
884 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
885 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
886 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
887 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
888 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
889 8,9,10,11,12,13,14,15, /* extended integer registers */
890 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
893 /* Define the register numbers to be used in Dwarf debugging information.
894 The SVR4 reference port C compiler uses the following register numbers
895 in its Dwarf output code:
896 0 for %eax (gcc regno = 0)
897 1 for %ecx (gcc regno = 2)
898 2 for %edx (gcc regno = 1)
899 3 for %ebx (gcc regno = 3)
900 4 for %esp (gcc regno = 7)
901 5 for %ebp (gcc regno = 6)
902 6 for %esi (gcc regno = 4)
903 7 for %edi (gcc regno = 5)
904 The following three DWARF register numbers are never generated by
905 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
906 believes these numbers have these meanings.
907 8 for %eip (no gcc equivalent)
908 9 for %eflags (gcc regno = 17)
909 10 for %trapno (no gcc equivalent)
910 It is not at all clear how we should number the FP stack registers
911 for the x86 architecture. If the version of SDB on x86/svr4 were
912 a bit less brain dead with respect to floating-point then we would
913 have a precedent to follow with respect to DWARF register numbers
914 for x86 FP registers, but the SDB on x86/svr4 is so completely
915 broken with respect to FP registers that it is hardly worth thinking
916 of it as something to strive for compatibility with.
917 The version of x86/svr4 SDB I have at the moment does (partially)
918 seem to believe that DWARF register number 11 is associated with
919 the x86 register %st(0), but that's about all. Higher DWARF
920 register numbers don't seem to be associated with anything in
921 particular, and even for DWARF regno 11, SDB only seems to under-
922 stand that it should say that a variable lives in %st(0) (when
923 asked via an `=' command) if we said it was in DWARF regno 11,
924 but SDB still prints garbage when asked for the value of the
925 variable in question (via a `/' command).
926 (Also note that the labels SDB prints for various FP stack regs
927 when doing an `x' command are all wrong.)
928 Note that these problems generally don't affect the native SVR4
929 C compiler because it doesn't allow the use of -O with -g and
930 because when it is *not* optimizing, it allocates a memory
931 location for each floating-point variable, and the memory
932 location is what gets described in the DWARF AT_location
933 attribute for the variable in question.
934 Regardless of the severe mental illness of the x86/svr4 SDB, we
935 do something sensible here and we use the following DWARF
936 register numbers. Note that these are all stack-top-relative
938 11 for %st(0) (gcc regno = 8)
939 12 for %st(1) (gcc regno = 9)
940 13 for %st(2) (gcc regno = 10)
941 14 for %st(3) (gcc regno = 11)
942 15 for %st(4) (gcc regno = 12)
943 16 for %st(5) (gcc regno = 13)
944 17 for %st(6) (gcc regno = 14)
945 18 for %st(7) (gcc regno = 15)
947 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
949 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
950 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
951 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
952 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
953 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
954 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
955 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
958 /* Test and compare insns in i386.md store the information needed to
959 generate branch and scc insns here. */
961 rtx ix86_compare_op0
= NULL_RTX
;
962 rtx ix86_compare_op1
= NULL_RTX
;
963 rtx ix86_compare_emitted
= NULL_RTX
;
965 /* Size of the register save area. */
966 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
968 /* Define the structure for the machine field in struct function. */
970 struct stack_local_entry
GTY(())
975 struct stack_local_entry
*next
;
978 /* Structure describing stack frame layout.
979 Stack grows downward:
985 saved frame pointer if frame_pointer_needed
986 <- HARD_FRAME_POINTER
992 > to_allocate <- FRAME_POINTER
1002 HOST_WIDE_INT frame
;
1004 int outgoing_arguments_size
;
1007 HOST_WIDE_INT to_allocate
;
1008 /* The offsets relative to ARG_POINTER. */
1009 HOST_WIDE_INT frame_pointer_offset
;
1010 HOST_WIDE_INT hard_frame_pointer_offset
;
1011 HOST_WIDE_INT stack_pointer_offset
;
1013 /* When save_regs_using_mov is set, emit prologue using
1014 move instead of push instructions. */
1015 bool save_regs_using_mov
;
1018 /* Code model option. */
1019 enum cmodel ix86_cmodel
;
1021 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1023 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1025 /* Which unit we are generating floating point math for. */
1026 enum fpmath_unit ix86_fpmath
;
1028 /* Which cpu are we scheduling for. */
1029 enum processor_type ix86_tune
;
1030 /* Which instruction set architecture to use. */
1031 enum processor_type ix86_arch
;
1033 /* true if sse prefetch instruction is not NOOP. */
1034 int x86_prefetch_sse
;
1036 /* ix86_regparm_string as a number */
1037 static int ix86_regparm
;
1039 /* Preferred alignment for stack boundary in bits. */
1040 unsigned int ix86_preferred_stack_boundary
;
1042 /* Values 1-5: see jump.c */
1043 int ix86_branch_cost
;
1045 /* Variables which are this size or smaller are put in the data/bss
1046 or ldata/lbss sections. */
1048 int ix86_section_threshold
= 65536;
1050 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1051 char internal_label_prefix
[16];
1052 int internal_label_prefix_len
;
1054 /* Table for BUILT_IN_NORMAL to BUILT_IN_MD mapping. */
1055 static GTY(()) tree ix86_builtin_function_variants
[(int) END_BUILTINS
];
1057 static bool ix86_handle_option (size_t, const char *, int);
1058 static void output_pic_addr_const (FILE *, rtx
, int);
1059 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1061 static const char *get_some_local_dynamic_name (void);
1062 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1063 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1064 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1066 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1067 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1069 static rtx
get_thread_pointer (int);
1070 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1071 static void get_pc_thunk_name (char [32], unsigned int);
1072 static rtx
gen_push (rtx
);
1073 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
1074 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
1075 static struct machine_function
* ix86_init_machine_status (void);
1076 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1077 static int ix86_nsaved_regs (void);
1078 static void ix86_emit_save_regs (void);
1079 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1080 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1081 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1082 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1083 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1084 static rtx
ix86_expand_aligntest (rtx
, int);
1085 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1086 static int ix86_issue_rate (void);
1087 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1088 static int ia32_multipass_dfa_lookahead (void);
1089 static void ix86_init_mmx_sse_builtins (void);
1090 static void ix86_init_sse_abi_builtins (void);
1091 static rtx
x86_this_parameter (tree
);
1092 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1093 HOST_WIDE_INT
, tree
);
1094 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1095 static void x86_file_start (void);
1096 static void ix86_reorg (void);
1097 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1098 static tree
ix86_build_builtin_va_list (void);
1099 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1101 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1102 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1104 static int ix86_address_cost (rtx
);
1105 static bool ix86_cannot_force_const_mem (rtx
);
1106 static rtx
ix86_delegitimize_address (rtx
);
1108 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1110 struct builtin_description
;
1111 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1113 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1115 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1116 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1117 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1118 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1119 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1120 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1121 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1122 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1123 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1124 static int ix86_fp_comparison_cost (enum rtx_code code
);
1125 static unsigned int ix86_select_alt_pic_regnum (void);
1126 static int ix86_save_reg (unsigned int, int);
1127 static void ix86_compute_frame_layout (struct ix86_frame
*);
1128 static int ix86_comp_type_attributes (tree
, tree
);
1129 static int ix86_function_regparm (tree
, tree
);
1130 const struct attribute_spec ix86_attribute_table
[];
1131 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1132 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1133 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1134 static bool contains_128bit_aligned_vector_p (tree
);
1135 static rtx
ix86_struct_value_rtx (tree
, int);
1136 static bool ix86_ms_bitfield_layout_p (tree
);
1137 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1138 static int extended_reg_mentioned_1 (rtx
*, void *);
1139 static bool ix86_rtx_costs (rtx
, int, int, int *);
1140 static int min_insn_size (rtx
);
1141 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1142 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1143 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1145 static void ix86_init_builtins (void);
1146 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1147 static rtx
ix86_expand_library_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1148 static const char *ix86_mangle_fundamental_type (tree
);
1149 static tree
ix86_stack_protect_fail (void);
1150 static rtx
ix86_internal_arg_pointer (void);
1151 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1153 /* This function is only used on Solaris. */
1154 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1157 /* Register class used for passing given 64bit part of the argument.
1158 These represent classes as documented by the PS ABI, with the exception
1159 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1160 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1162 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1163 whenever possible (upper half does contain padding).
1165 enum x86_64_reg_class
1168 X86_64_INTEGER_CLASS
,
1169 X86_64_INTEGERSI_CLASS
,
1176 X86_64_COMPLEX_X87_CLASS
,
1179 static const char * const x86_64_reg_class_name
[] = {
1180 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1181 "sseup", "x87", "x87up", "cplx87", "no"
1184 #define MAX_CLASSES 4
1186 /* Table of constants used by fldpi, fldln2, etc.... */
1187 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1188 static bool ext_80387_constants_init
= 0;
1189 static void init_ext_80387_constants (void);
1190 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1191 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1192 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1193 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1194 unsigned HOST_WIDE_INT align
)
1197 /* Initialize the GCC target structure. */
1198 #undef TARGET_ATTRIBUTE_TABLE
1199 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1200 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1201 # undef TARGET_MERGE_DECL_ATTRIBUTES
1202 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1205 #undef TARGET_COMP_TYPE_ATTRIBUTES
1206 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1208 #undef TARGET_INIT_BUILTINS
1209 #define TARGET_INIT_BUILTINS ix86_init_builtins
1210 #undef TARGET_EXPAND_BUILTIN
1211 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1212 #undef TARGET_EXPAND_LIBRARY_BUILTIN
1213 #define TARGET_EXPAND_LIBRARY_BUILTIN ix86_expand_library_builtin
1215 #undef TARGET_ASM_FUNCTION_EPILOGUE
1216 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1218 #undef TARGET_ENCODE_SECTION_INFO
1219 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1220 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1222 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1225 #undef TARGET_ASM_OPEN_PAREN
1226 #define TARGET_ASM_OPEN_PAREN ""
1227 #undef TARGET_ASM_CLOSE_PAREN
1228 #define TARGET_ASM_CLOSE_PAREN ""
1230 #undef TARGET_ASM_ALIGNED_HI_OP
1231 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1232 #undef TARGET_ASM_ALIGNED_SI_OP
1233 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1235 #undef TARGET_ASM_ALIGNED_DI_OP
1236 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1239 #undef TARGET_ASM_UNALIGNED_HI_OP
1240 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1241 #undef TARGET_ASM_UNALIGNED_SI_OP
1242 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1243 #undef TARGET_ASM_UNALIGNED_DI_OP
1244 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1246 #undef TARGET_SCHED_ADJUST_COST
1247 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1248 #undef TARGET_SCHED_ISSUE_RATE
1249 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1250 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1251 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1252 ia32_multipass_dfa_lookahead
1254 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1255 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1258 #undef TARGET_HAVE_TLS
1259 #define TARGET_HAVE_TLS true
1261 #undef TARGET_CANNOT_FORCE_CONST_MEM
1262 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1264 #undef TARGET_DELEGITIMIZE_ADDRESS
1265 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1267 #undef TARGET_MS_BITFIELD_LAYOUT_P
1268 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1271 #undef TARGET_BINDS_LOCAL_P
1272 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1275 #undef TARGET_ASM_OUTPUT_MI_THUNK
1276 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1277 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1278 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1280 #undef TARGET_ASM_FILE_START
1281 #define TARGET_ASM_FILE_START x86_file_start
1283 #undef TARGET_DEFAULT_TARGET_FLAGS
1284 #define TARGET_DEFAULT_TARGET_FLAGS \
1286 | TARGET_64BIT_DEFAULT \
1287 | TARGET_SUBTARGET_DEFAULT \
1288 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1290 #undef TARGET_HANDLE_OPTION
1291 #define TARGET_HANDLE_OPTION ix86_handle_option
1293 #undef TARGET_RTX_COSTS
1294 #define TARGET_RTX_COSTS ix86_rtx_costs
1295 #undef TARGET_ADDRESS_COST
1296 #define TARGET_ADDRESS_COST ix86_address_cost
1298 #undef TARGET_FIXED_CONDITION_CODE_REGS
1299 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1300 #undef TARGET_CC_MODES_COMPATIBLE
1301 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1303 #undef TARGET_MACHINE_DEPENDENT_REORG
1304 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1306 #undef TARGET_BUILD_BUILTIN_VA_LIST
1307 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1309 #undef TARGET_MD_ASM_CLOBBERS
1310 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1312 #undef TARGET_PROMOTE_PROTOTYPES
1313 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1314 #undef TARGET_STRUCT_VALUE_RTX
1315 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1316 #undef TARGET_SETUP_INCOMING_VARARGS
1317 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1318 #undef TARGET_MUST_PASS_IN_STACK
1319 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1320 #undef TARGET_PASS_BY_REFERENCE
1321 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1322 #undef TARGET_INTERNAL_ARG_POINTER
1323 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1324 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1325 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1327 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1328 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1330 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1331 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1334 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1335 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1338 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1339 #undef TARGET_INSERT_ATTRIBUTES
1340 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1343 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1344 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1346 #undef TARGET_STACK_PROTECT_FAIL
1347 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1349 #undef TARGET_FUNCTION_VALUE
1350 #define TARGET_FUNCTION_VALUE ix86_function_value
1352 struct gcc_target targetm
= TARGET_INITIALIZER
;
1355 /* The svr4 ABI for the i386 says that records and unions are returned
1357 #ifndef DEFAULT_PCC_STRUCT_RETURN
1358 #define DEFAULT_PCC_STRUCT_RETURN 1
1361 /* Implement TARGET_HANDLE_OPTION. */
1364 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1371 target_flags
&= ~MASK_3DNOW_A
;
1372 target_flags_explicit
|= MASK_3DNOW_A
;
1379 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1380 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1387 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
);
1388 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
;
1395 target_flags
&= ~MASK_SSE3
;
1396 target_flags_explicit
|= MASK_SSE3
;
1405 /* Sometimes certain combinations of command options do not make
1406 sense on a particular target machine. You can define a macro
1407 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1408 defined, is executed once just after all the command options have
1411 Don't use this macro to turn on various extra optimizations for
1412 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1415 override_options (void)
1418 int ix86_tune_defaulted
= 0;
1420 /* Comes from final.c -- no real reason to change it. */
1421 #define MAX_CODE_ALIGN 16
1425 const struct processor_costs
*cost
; /* Processor costs */
1426 const int target_enable
; /* Target flags to enable. */
1427 const int target_disable
; /* Target flags to disable. */
1428 const int align_loop
; /* Default alignments. */
1429 const int align_loop_max_skip
;
1430 const int align_jump
;
1431 const int align_jump_max_skip
;
1432 const int align_func
;
1434 const processor_target_table
[PROCESSOR_max
] =
1436 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1437 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1438 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1439 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1440 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1441 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1442 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1443 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1444 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1445 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1446 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16}
1449 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1452 const char *const name
; /* processor name or nickname. */
1453 const enum processor_type processor
;
1454 const enum pta_flags
1460 PTA_PREFETCH_SSE
= 16,
1466 const processor_alias_table
[] =
1468 {"i386", PROCESSOR_I386
, 0},
1469 {"i486", PROCESSOR_I486
, 0},
1470 {"i586", PROCESSOR_PENTIUM
, 0},
1471 {"pentium", PROCESSOR_PENTIUM
, 0},
1472 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1473 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1474 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1475 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1476 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1477 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1478 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1479 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1480 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1481 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1482 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1483 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1484 | PTA_MMX
| PTA_PREFETCH_SSE
},
1485 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1486 | PTA_MMX
| PTA_PREFETCH_SSE
},
1487 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1488 | PTA_MMX
| PTA_PREFETCH_SSE
},
1489 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1490 | PTA_MMX
| PTA_PREFETCH_SSE
},
1491 {"k6", PROCESSOR_K6
, PTA_MMX
},
1492 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1493 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1494 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1496 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1497 | PTA_3DNOW
| PTA_3DNOW_A
},
1498 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1499 | PTA_3DNOW_A
| PTA_SSE
},
1500 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1501 | PTA_3DNOW_A
| PTA_SSE
},
1502 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1503 | PTA_3DNOW_A
| PTA_SSE
},
1504 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1505 | PTA_SSE
| PTA_SSE2
},
1506 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1507 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1508 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1509 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1510 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1511 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1512 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1513 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1514 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1515 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1518 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1520 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1521 SUBTARGET_OVERRIDE_OPTIONS
;
1524 /* Set the default values for switches whose default depends on TARGET_64BIT
1525 in case they weren't overwritten by command line options. */
1528 if (flag_omit_frame_pointer
== 2)
1529 flag_omit_frame_pointer
= 1;
1530 if (flag_asynchronous_unwind_tables
== 2)
1531 flag_asynchronous_unwind_tables
= 1;
1532 if (flag_pcc_struct_return
== 2)
1533 flag_pcc_struct_return
= 0;
1537 if (flag_omit_frame_pointer
== 2)
1538 flag_omit_frame_pointer
= 0;
1539 if (flag_asynchronous_unwind_tables
== 2)
1540 flag_asynchronous_unwind_tables
= 0;
1541 if (flag_pcc_struct_return
== 2)
1542 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1545 /* Need to check -mtune=generic first. */
1546 if (ix86_tune_string
)
1548 if (!strcmp (ix86_tune_string
, "generic")
1549 || !strcmp (ix86_tune_string
, "i686"))
1552 ix86_tune_string
= "generic64";
1554 ix86_tune_string
= "generic32";
1556 else if (!strncmp (ix86_tune_string
, "generic", 7))
1557 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1561 if (ix86_arch_string
)
1562 ix86_tune_string
= ix86_arch_string
;
1563 if (!ix86_tune_string
)
1565 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1566 ix86_tune_defaulted
= 1;
1569 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1570 need to use a sensible tune option. */
1571 if (!strcmp (ix86_tune_string
, "generic")
1572 || !strcmp (ix86_tune_string
, "x86-64")
1573 || !strcmp (ix86_tune_string
, "i686"))
1576 ix86_tune_string
= "generic64";
1578 ix86_tune_string
= "generic32";
1581 if (!strcmp (ix86_tune_string
, "x86-64"))
1582 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1583 "-mtune=generic instead as appropriate.");
1585 if (!ix86_arch_string
)
1586 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1587 if (!strcmp (ix86_arch_string
, "generic"))
1588 error ("generic CPU can be used only for -mtune= switch");
1589 if (!strncmp (ix86_arch_string
, "generic", 7))
1590 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1592 if (ix86_cmodel_string
!= 0)
1594 if (!strcmp (ix86_cmodel_string
, "small"))
1595 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1596 else if (!strcmp (ix86_cmodel_string
, "medium"))
1597 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
1599 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1600 else if (!strcmp (ix86_cmodel_string
, "32"))
1601 ix86_cmodel
= CM_32
;
1602 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1603 ix86_cmodel
= CM_KERNEL
;
1604 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1605 ix86_cmodel
= CM_LARGE
;
1607 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1611 ix86_cmodel
= CM_32
;
1613 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1615 if (ix86_asm_string
!= 0)
1618 && !strcmp (ix86_asm_string
, "intel"))
1619 ix86_asm_dialect
= ASM_INTEL
;
1620 else if (!strcmp (ix86_asm_string
, "att"))
1621 ix86_asm_dialect
= ASM_ATT
;
1623 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1625 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1626 error ("code model %qs not supported in the %s bit mode",
1627 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1628 if (ix86_cmodel
== CM_LARGE
)
1629 sorry ("code model %<large%> not supported yet");
1630 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1631 sorry ("%i-bit mode not compiled in",
1632 (target_flags
& MASK_64BIT
) ? 64 : 32);
1634 for (i
= 0; i
< pta_size
; i
++)
1635 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1637 ix86_arch
= processor_alias_table
[i
].processor
;
1638 /* Default cpu tuning to the architecture. */
1639 ix86_tune
= ix86_arch
;
1640 if (processor_alias_table
[i
].flags
& PTA_MMX
1641 && !(target_flags_explicit
& MASK_MMX
))
1642 target_flags
|= MASK_MMX
;
1643 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1644 && !(target_flags_explicit
& MASK_3DNOW
))
1645 target_flags
|= MASK_3DNOW
;
1646 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1647 && !(target_flags_explicit
& MASK_3DNOW_A
))
1648 target_flags
|= MASK_3DNOW_A
;
1649 if (processor_alias_table
[i
].flags
& PTA_SSE
1650 && !(target_flags_explicit
& MASK_SSE
))
1651 target_flags
|= MASK_SSE
;
1652 if (processor_alias_table
[i
].flags
& PTA_SSE2
1653 && !(target_flags_explicit
& MASK_SSE2
))
1654 target_flags
|= MASK_SSE2
;
1655 if (processor_alias_table
[i
].flags
& PTA_SSE3
1656 && !(target_flags_explicit
& MASK_SSE3
))
1657 target_flags
|= MASK_SSE3
;
1658 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1659 x86_prefetch_sse
= true;
1660 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1661 error ("CPU you selected does not support x86-64 "
1667 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1669 for (i
= 0; i
< pta_size
; i
++)
1670 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1672 ix86_tune
= processor_alias_table
[i
].processor
;
1673 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1675 if (ix86_tune_defaulted
)
1677 ix86_tune_string
= "x86-64";
1678 for (i
= 0; i
< pta_size
; i
++)
1679 if (! strcmp (ix86_tune_string
,
1680 processor_alias_table
[i
].name
))
1682 ix86_tune
= processor_alias_table
[i
].processor
;
1685 error ("CPU you selected does not support x86-64 "
1688 /* Intel CPUs have always interpreted SSE prefetch instructions as
1689 NOPs; so, we can enable SSE prefetch instructions even when
1690 -mtune (rather than -march) points us to a processor that has them.
1691 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1692 higher processors. */
1693 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1694 x86_prefetch_sse
= true;
1698 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1701 ix86_cost
= &size_cost
;
1703 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1704 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1705 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1707 /* Arrange to set up i386_stack_locals for all functions. */
1708 init_machine_status
= ix86_init_machine_status
;
1710 /* Validate -mregparm= value. */
1711 if (ix86_regparm_string
)
1713 i
= atoi (ix86_regparm_string
);
1714 if (i
< 0 || i
> REGPARM_MAX
)
1715 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1721 ix86_regparm
= REGPARM_MAX
;
1723 /* If the user has provided any of the -malign-* options,
1724 warn and use that value only if -falign-* is not set.
1725 Remove this code in GCC 3.2 or later. */
1726 if (ix86_align_loops_string
)
1728 warning (0, "-malign-loops is obsolete, use -falign-loops");
1729 if (align_loops
== 0)
1731 i
= atoi (ix86_align_loops_string
);
1732 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1733 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1735 align_loops
= 1 << i
;
1739 if (ix86_align_jumps_string
)
1741 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1742 if (align_jumps
== 0)
1744 i
= atoi (ix86_align_jumps_string
);
1745 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1746 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1748 align_jumps
= 1 << i
;
1752 if (ix86_align_funcs_string
)
1754 warning (0, "-malign-functions is obsolete, use -falign-functions");
1755 if (align_functions
== 0)
1757 i
= atoi (ix86_align_funcs_string
);
1758 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1759 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1761 align_functions
= 1 << i
;
1765 /* Default align_* from the processor table. */
1766 if (align_loops
== 0)
1768 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1769 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1771 if (align_jumps
== 0)
1773 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1774 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1776 if (align_functions
== 0)
1778 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1781 /* Validate -mpreferred-stack-boundary= value, or provide default.
1782 The default of 128 bits is for Pentium III's SSE __m128, but we
1783 don't want additional code to keep the stack aligned when
1784 optimizing for code size. */
1785 ix86_preferred_stack_boundary
= ((TARGET_64BIT
|| TARGET_MACHO
|| !optimize_size
)
1787 if (ix86_preferred_stack_boundary_string
)
1789 i
= atoi (ix86_preferred_stack_boundary_string
);
1790 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1791 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1792 TARGET_64BIT
? 4 : 2);
1794 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1797 /* Validate -mbranch-cost= value, or provide default. */
1798 ix86_branch_cost
= ix86_cost
->branch_cost
;
1799 if (ix86_branch_cost_string
)
1801 i
= atoi (ix86_branch_cost_string
);
1803 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1805 ix86_branch_cost
= i
;
1807 if (ix86_section_threshold_string
)
1809 i
= atoi (ix86_section_threshold_string
);
1811 error ("-mlarge-data-threshold=%d is negative", i
);
1813 ix86_section_threshold
= i
;
1816 if (ix86_tls_dialect_string
)
1818 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1819 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1820 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
1821 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
1822 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1823 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1825 error ("bad value (%s) for -mtls-dialect= switch",
1826 ix86_tls_dialect_string
);
1829 /* Keep nonleaf frame pointers. */
1830 if (flag_omit_frame_pointer
)
1831 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
1832 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1833 flag_omit_frame_pointer
= 1;
1835 /* If we're doing fast math, we don't care about comparison order
1836 wrt NaNs. This lets us use a shorter comparison sequence. */
1837 if (flag_unsafe_math_optimizations
)
1838 target_flags
&= ~MASK_IEEE_FP
;
1840 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1841 since the insns won't need emulation. */
1842 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1843 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1845 /* Likewise, if the target doesn't have a 387, or we've specified
1846 software floating point, don't use 387 inline intrinsics. */
1848 target_flags
|= MASK_NO_FANCY_MATH_387
;
1850 /* Turn on SSE2 builtins for -msse3. */
1852 target_flags
|= MASK_SSE2
;
1854 /* Turn on SSE builtins for -msse2. */
1856 target_flags
|= MASK_SSE
;
1858 /* Turn on MMX builtins for -msse. */
1861 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
1862 x86_prefetch_sse
= true;
1865 /* Turn on MMX builtins for 3Dnow. */
1867 target_flags
|= MASK_MMX
;
1871 if (TARGET_ALIGN_DOUBLE
)
1872 error ("-malign-double makes no sense in the 64bit mode");
1874 error ("-mrtd calling convention not supported in the 64bit mode");
1876 /* Enable by default the SSE and MMX builtins. Do allow the user to
1877 explicitly disable any of these. In particular, disabling SSE and
1878 MMX for kernel code is extremely useful. */
1880 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
1881 & ~target_flags_explicit
);
1885 /* i386 ABI does not specify red zone. It still makes sense to use it
1886 when programmer takes care to stack from being destroyed. */
1887 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1888 target_flags
|= MASK_NO_RED_ZONE
;
1891 /* Accept -msseregparm only if at least SSE support is enabled. */
1892 if (TARGET_SSEREGPARM
1894 error ("-msseregparm used without SSE enabled");
1896 /* Accept -msselibm only if at least SSE support is enabled. */
1899 error ("-msselibm used without SSE2 enabled");
1901 /* Ignore -msselibm on 64bit targets. */
1904 error ("-msselibm used on a 64bit target");
1906 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
1908 if (ix86_fpmath_string
!= 0)
1910 if (! strcmp (ix86_fpmath_string
, "387"))
1911 ix86_fpmath
= FPMATH_387
;
1912 else if (! strcmp (ix86_fpmath_string
, "sse"))
1916 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1917 ix86_fpmath
= FPMATH_387
;
1920 ix86_fpmath
= FPMATH_SSE
;
1922 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1923 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1927 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1928 ix86_fpmath
= FPMATH_387
;
1930 else if (!TARGET_80387
)
1932 warning (0, "387 instruction set disabled, using SSE arithmetics");
1933 ix86_fpmath
= FPMATH_SSE
;
1936 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1939 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1942 /* If the i387 is disabled, then do not return values in it. */
1944 target_flags
&= ~MASK_FLOAT_RETURNS
;
1946 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1947 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1949 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1951 /* ??? Unwind info is not correct around the CFG unless either a frame
1952 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1953 unwind info generation to be aware of the CFG and propagating states
1955 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
1956 || flag_exceptions
|| flag_non_call_exceptions
)
1957 && flag_omit_frame_pointer
1958 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
1960 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1961 warning (0, "unwind tables currently require either a frame pointer "
1962 "or -maccumulate-outgoing-args for correctness");
1963 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1966 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1969 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1970 p
= strchr (internal_label_prefix
, 'X');
1971 internal_label_prefix_len
= p
- internal_label_prefix
;
1975 /* When scheduling description is not available, disable scheduler pass
1976 so it won't slow down the compilation and make x87 code slower. */
1977 if (!TARGET_SCHEDULE
)
1978 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
1981 /* switch to the appropriate section for output of DECL.
1982 DECL is either a `VAR_DECL' node or a constant of some sort.
1983 RELOC indicates whether forming the initial value of DECL requires
1984 link-time relocations. */
1987 x86_64_elf_select_section (tree decl
, int reloc
,
1988 unsigned HOST_WIDE_INT align
)
1990 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
1991 && ix86_in_large_data_p (decl
))
1993 const char *sname
= NULL
;
1994 unsigned int flags
= SECTION_WRITE
;
1995 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2000 case SECCAT_DATA_REL
:
2001 sname
= ".ldata.rel";
2003 case SECCAT_DATA_REL_LOCAL
:
2004 sname
= ".ldata.rel.local";
2006 case SECCAT_DATA_REL_RO
:
2007 sname
= ".ldata.rel.ro";
2009 case SECCAT_DATA_REL_RO_LOCAL
:
2010 sname
= ".ldata.rel.ro.local";
2014 flags
|= SECTION_BSS
;
2017 case SECCAT_RODATA_MERGE_STR
:
2018 case SECCAT_RODATA_MERGE_STR_INIT
:
2019 case SECCAT_RODATA_MERGE_CONST
:
2023 case SECCAT_SRODATA
:
2030 /* We don't split these for medium model. Place them into
2031 default sections and hope for best. */
2036 /* We might get called with string constants, but get_named_section
2037 doesn't like them as they are not DECLs. Also, we need to set
2038 flags in that case. */
2040 return get_section (sname
, flags
, NULL
);
2041 return get_named_section (decl
, sname
, reloc
);
2044 return default_elf_select_section (decl
, reloc
, align
);
2047 /* Build up a unique section name, expressed as a
2048 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2049 RELOC indicates whether the initial value of EXP requires
2050 link-time relocations. */
2053 x86_64_elf_unique_section (tree decl
, int reloc
)
2055 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2056 && ix86_in_large_data_p (decl
))
2058 const char *prefix
= NULL
;
2059 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2060 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2062 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2065 case SECCAT_DATA_REL
:
2066 case SECCAT_DATA_REL_LOCAL
:
2067 case SECCAT_DATA_REL_RO
:
2068 case SECCAT_DATA_REL_RO_LOCAL
:
2069 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2072 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2075 case SECCAT_RODATA_MERGE_STR
:
2076 case SECCAT_RODATA_MERGE_STR_INIT
:
2077 case SECCAT_RODATA_MERGE_CONST
:
2078 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2080 case SECCAT_SRODATA
:
2087 /* We don't split these for medium model. Place them into
2088 default sections and hope for best. */
2096 plen
= strlen (prefix
);
2098 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2099 name
= targetm
.strip_name_encoding (name
);
2100 nlen
= strlen (name
);
2102 string
= alloca (nlen
+ plen
+ 1);
2103 memcpy (string
, prefix
, plen
);
2104 memcpy (string
+ plen
, name
, nlen
+ 1);
2106 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2110 default_unique_section (decl
, reloc
);
2113 #ifdef COMMON_ASM_OP
2114 /* This says how to output assembler code to declare an
2115 uninitialized external linkage data object.
2117 For medium model x86-64 we need to use .largecomm opcode for
2120 x86_elf_aligned_common (FILE *file
,
2121 const char *name
, unsigned HOST_WIDE_INT size
,
2124 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2125 && size
> (unsigned int)ix86_section_threshold
)
2126 fprintf (file
, ".largecomm\t");
2128 fprintf (file
, "%s", COMMON_ASM_OP
);
2129 assemble_name (file
, name
);
2130 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2131 size
, align
/ BITS_PER_UNIT
);
2134 /* Utility function for targets to use in implementing
2135 ASM_OUTPUT_ALIGNED_BSS. */
2138 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2139 const char *name
, unsigned HOST_WIDE_INT size
,
2142 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2143 && size
> (unsigned int)ix86_section_threshold
)
2144 switch_to_section (get_named_section (decl
, ".lbss", 0));
2146 switch_to_section (bss_section
);
2147 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2148 #ifdef ASM_DECLARE_OBJECT_NAME
2149 last_assemble_variable_decl
= decl
;
2150 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2152 /* Standard thing is just output label for the object. */
2153 ASM_OUTPUT_LABEL (file
, name
);
2154 #endif /* ASM_DECLARE_OBJECT_NAME */
2155 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2160 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2162 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2163 make the problem with not enough registers even worse. */
2164 #ifdef INSN_SCHEDULING
2166 flag_schedule_insns
= 0;
2170 /* The Darwin libraries never set errno, so we might as well
2171 avoid calling them when that's the only reason we would. */
2172 flag_errno_math
= 0;
2174 /* The default values of these switches depend on the TARGET_64BIT
2175 that is not known at this moment. Mark these values with 2 and
2176 let user the to override these. In case there is no command line option
2177 specifying them, we will set the defaults in override_options. */
2179 flag_omit_frame_pointer
= 2;
2180 flag_pcc_struct_return
= 2;
2181 flag_asynchronous_unwind_tables
= 2;
2182 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2183 SUBTARGET_OPTIMIZATION_OPTIONS
;
2187 /* Table of valid machine attributes. */
2188 const struct attribute_spec ix86_attribute_table
[] =
2190 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2191 /* Stdcall attribute says callee is responsible for popping arguments
2192 if they are not variable. */
2193 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2194 /* Fastcall attribute says callee is responsible for popping arguments
2195 if they are not variable. */
2196 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2197 /* Cdecl attribute says the callee is a normal C declaration */
2198 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2199 /* Regparm attribute specifies how many integer arguments are to be
2200 passed in registers. */
2201 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2202 /* Sseregparm attribute says we are using x86_64 calling conventions
2203 for FP arguments. */
2204 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2205 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2206 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2207 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2208 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2210 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2211 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2212 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2213 SUBTARGET_ATTRIBUTE_TABLE
,
2215 { NULL
, 0, 0, false, false, false, NULL
}
2218 /* Decide whether we can make a sibling call to a function. DECL is the
2219 declaration of the function being targeted by the call and EXP is the
2220 CALL_EXPR representing the call. */
2223 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2228 /* If we are generating position-independent code, we cannot sibcall
2229 optimize any indirect call, or a direct call to a global function,
2230 as the PLT requires %ebx be live. */
2231 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2238 func
= TREE_TYPE (TREE_OPERAND (exp
, 0));
2239 if (POINTER_TYPE_P (func
))
2240 func
= TREE_TYPE (func
);
2243 /* Check that the return value locations are the same. Like
2244 if we are returning floats on the 80387 register stack, we cannot
2245 make a sibcall from a function that doesn't return a float to a
2246 function that does or, conversely, from a function that does return
2247 a float to a function that doesn't; the necessary stack adjustment
2248 would not be executed. This is also the place we notice
2249 differences in the return value ABI. Note that it is ok for one
2250 of the functions to have void return type as long as the return
2251 value of the other is passed in a register. */
2252 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2253 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2255 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2257 if (!rtx_equal_p (a
, b
))
2260 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2262 else if (!rtx_equal_p (a
, b
))
2265 /* If this call is indirect, we'll need to be able to use a call-clobbered
2266 register for the address of the target function. Make sure that all
2267 such registers are not used for passing parameters. */
2268 if (!decl
&& !TARGET_64BIT
)
2272 /* We're looking at the CALL_EXPR, we need the type of the function. */
2273 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
2274 type
= TREE_TYPE (type
); /* pointer type */
2275 type
= TREE_TYPE (type
); /* function type */
2277 if (ix86_function_regparm (type
, NULL
) >= 3)
2279 /* ??? Need to count the actual number of registers to be used,
2280 not the possible number of registers. Fix later. */
2285 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2286 /* Dllimport'd functions are also called indirectly. */
2287 if (decl
&& DECL_DLLIMPORT_P (decl
)
2288 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2292 /* If we forced aligned the stack, then sibcalling would unalign the
2293 stack, which may break the called function. */
2294 if (cfun
->machine
->force_align_arg_pointer
)
2297 /* Otherwise okay. That also includes certain types of indirect calls. */
2301 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2302 calling convention attributes;
2303 arguments as in struct attribute_spec.handler. */
2306 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2308 int flags ATTRIBUTE_UNUSED
,
2311 if (TREE_CODE (*node
) != FUNCTION_TYPE
2312 && TREE_CODE (*node
) != METHOD_TYPE
2313 && TREE_CODE (*node
) != FIELD_DECL
2314 && TREE_CODE (*node
) != TYPE_DECL
)
2316 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2317 IDENTIFIER_POINTER (name
));
2318 *no_add_attrs
= true;
2322 /* Can combine regparm with all attributes but fastcall. */
2323 if (is_attribute_p ("regparm", name
))
2327 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2329 error ("fastcall and regparm attributes are not compatible");
2332 cst
= TREE_VALUE (args
);
2333 if (TREE_CODE (cst
) != INTEGER_CST
)
2335 warning (OPT_Wattributes
,
2336 "%qs attribute requires an integer constant argument",
2337 IDENTIFIER_POINTER (name
));
2338 *no_add_attrs
= true;
2340 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2342 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2343 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2344 *no_add_attrs
= true;
2352 warning (OPT_Wattributes
, "%qs attribute ignored",
2353 IDENTIFIER_POINTER (name
));
2354 *no_add_attrs
= true;
2358 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2359 if (is_attribute_p ("fastcall", name
))
2361 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2363 error ("fastcall and cdecl attributes are not compatible");
2365 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2367 error ("fastcall and stdcall attributes are not compatible");
2369 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2371 error ("fastcall and regparm attributes are not compatible");
2375 /* Can combine stdcall with fastcall (redundant), regparm and
2377 else if (is_attribute_p ("stdcall", name
))
2379 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2381 error ("stdcall and cdecl attributes are not compatible");
2383 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2385 error ("stdcall and fastcall attributes are not compatible");
2389 /* Can combine cdecl with regparm and sseregparm. */
2390 else if (is_attribute_p ("cdecl", name
))
2392 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2394 error ("stdcall and cdecl attributes are not compatible");
2396 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2398 error ("fastcall and cdecl attributes are not compatible");
2402 /* Can combine sseregparm with all attributes. */
2407 /* Return 0 if the attributes for two types are incompatible, 1 if they
2408 are compatible, and 2 if they are nearly compatible (which causes a
2409 warning to be generated). */
2412 ix86_comp_type_attributes (tree type1
, tree type2
)
2414 /* Check for mismatch of non-default calling convention. */
2415 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2417 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2420 /* Check for mismatched fastcall/regparm types. */
2421 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2422 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2423 || (ix86_function_regparm (type1
, NULL
)
2424 != ix86_function_regparm (type2
, NULL
)))
2427 /* Check for mismatched sseregparm types. */
2428 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2429 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2432 /* Check for mismatched return types (cdecl vs stdcall). */
2433 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2434 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2440 /* Return the regparm value for a function with the indicated TYPE and DECL.
2441 DECL may be NULL when calling function indirectly
2442 or considering a libcall. */
2445 ix86_function_regparm (tree type
, tree decl
)
2448 int regparm
= ix86_regparm
;
2449 bool user_convention
= false;
2453 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2456 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2457 user_convention
= true;
2460 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2463 user_convention
= true;
2466 /* Use register calling convention for local functions when possible. */
2467 if (!TARGET_64BIT
&& !user_convention
&& decl
2468 && flag_unit_at_a_time
&& !profile_flag
)
2470 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2473 int local_regparm
, globals
= 0, regno
;
2475 /* Make sure no regparm register is taken by a global register
2477 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2478 if (global_regs
[local_regparm
])
2480 /* We can't use regparm(3) for nested functions as these use
2481 static chain pointer in third argument. */
2482 if (local_regparm
== 3
2483 && decl_function_context (decl
)
2484 && !DECL_NO_STATIC_CHAIN (decl
))
2486 /* Each global register variable increases register preassure,
2487 so the more global reg vars there are, the smaller regparm
2488 optimization use, unless requested by the user explicitly. */
2489 for (regno
= 0; regno
< 6; regno
++)
2490 if (global_regs
[regno
])
2493 = globals
< local_regparm
? local_regparm
- globals
: 0;
2495 if (local_regparm
> regparm
)
2496 regparm
= local_regparm
;
2503 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2504 in SSE registers for a function with the indicated TYPE and DECL.
2505 DECL may be NULL when calling function indirectly
2506 or considering a libcall. Otherwise return 0. */
2509 ix86_function_sseregparm (tree type
, tree decl
)
2511 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2512 by the sseregparm attribute. */
2513 if (TARGET_SSEREGPARM
2515 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2520 error ("Calling %qD with attribute sseregparm without "
2521 "SSE/SSE2 enabled", decl
);
2523 error ("Calling %qT with attribute sseregparm without "
2524 "SSE/SSE2 enabled", type
);
2531 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2532 in SSE registers even for 32-bit mode and not just 3, but up to
2533 8 SSE arguments in registers. */
2534 if (!TARGET_64BIT
&& decl
2535 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2537 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2539 return TARGET_SSE2
? 2 : 1;
2545 /* Return true if EAX is live at the start of the function. Used by
2546 ix86_expand_prologue to determine if we need special help before
2547 calling allocate_stack_worker. */
2550 ix86_eax_live_at_start_p (void)
2552 /* Cheat. Don't bother working forward from ix86_function_regparm
2553 to the function type to whether an actual argument is located in
2554 eax. Instead just look at cfg info, which is still close enough
2555 to correct at this point. This gives false positives for broken
2556 functions that might use uninitialized data that happens to be
2557 allocated in eax, but who cares? */
2558 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
2561 /* Value is the number of bytes of arguments automatically
2562 popped when returning from a subroutine call.
2563 FUNDECL is the declaration node of the function (as a tree),
2564 FUNTYPE is the data type of the function (as a tree),
2565 or for a library call it is an identifier node for the subroutine name.
2566 SIZE is the number of bytes of arguments passed on the stack.
2568 On the 80386, the RTD insn may be used to pop them if the number
2569 of args is fixed, but if the number is variable then the caller
2570 must pop them all. RTD can't be used for library calls now
2571 because the library is compiled with the Unix compiler.
2572 Use of RTD is a selectable option, since it is incompatible with
2573 standard Unix calling sequences. If the option is not selected,
2574 the caller must always pop the args.
2576 The attribute stdcall is equivalent to RTD on a per module basis. */
2579 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
2581 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
2583 /* Cdecl functions override -mrtd, and never pop the stack. */
2584 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
2586 /* Stdcall and fastcall functions will pop the stack if not
2588 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
2589 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
2593 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
2594 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
2595 == void_type_node
)))
2599 /* Lose any fake structure return argument if it is passed on the stack. */
2600 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
2602 && !KEEP_AGGREGATE_RETURN_POINTER
)
2604 int nregs
= ix86_function_regparm (funtype
, fundecl
);
2607 return GET_MODE_SIZE (Pmode
);
2613 /* Argument support functions. */
2615 /* Return true when register may be used to pass function parameters. */
2617 ix86_function_arg_regno_p (int regno
)
2621 return (regno
< REGPARM_MAX
2622 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
2623 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
2624 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
2625 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
2627 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
2628 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
2630 /* RAX is used as hidden argument to va_arg functions. */
2633 for (i
= 0; i
< REGPARM_MAX
; i
++)
2634 if (regno
== x86_64_int_parameter_registers
[i
])
2639 /* Return if we do not know how to pass TYPE solely in registers. */
2642 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
2644 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
2647 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2648 The layout_type routine is crafty and tries to trick us into passing
2649 currently unsupported vector types on the stack by using TImode. */
2650 return (!TARGET_64BIT
&& mode
== TImode
2651 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
2654 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2655 for a call to a function whose data type is FNTYPE.
2656 For a library call, FNTYPE is 0. */
2659 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
2660 tree fntype
, /* tree ptr for function decl */
2661 rtx libname
, /* SYMBOL_REF of library name or 0 */
2664 static CUMULATIVE_ARGS zero_cum
;
2665 tree param
, next_param
;
2667 if (TARGET_DEBUG_ARG
)
2669 fprintf (stderr
, "\ninit_cumulative_args (");
2671 fprintf (stderr
, "fntype code = %s, ret code = %s",
2672 tree_code_name
[(int) TREE_CODE (fntype
)],
2673 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
2675 fprintf (stderr
, "no fntype");
2678 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
2683 /* Set up the number of registers to use for passing arguments. */
2684 cum
->nregs
= ix86_regparm
;
2686 cum
->sse_nregs
= SSE_REGPARM_MAX
;
2688 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
2689 cum
->warn_sse
= true;
2690 cum
->warn_mmx
= true;
2691 cum
->maybe_vaarg
= false;
2693 /* Use ecx and edx registers if function has fastcall attribute,
2694 else look for regparm information. */
2695 if (fntype
&& !TARGET_64BIT
)
2697 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
2703 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
2706 /* Set up the number of SSE registers used for passing SFmode
2707 and DFmode arguments. Warn for mismatching ABI. */
2708 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
2710 /* Determine if this function has variable arguments. This is
2711 indicated by the last argument being 'void_type_mode' if there
2712 are no variable arguments. If there are variable arguments, then
2713 we won't pass anything in registers in 32-bit mode. */
2715 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
2717 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
2718 param
!= 0; param
= next_param
)
2720 next_param
= TREE_CHAIN (param
);
2721 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
2731 cum
->float_in_sse
= 0;
2733 cum
->maybe_vaarg
= true;
2737 if ((!fntype
&& !libname
)
2738 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2739 cum
->maybe_vaarg
= true;
2741 if (TARGET_DEBUG_ARG
)
2742 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2747 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2748 But in the case of vector types, it is some vector mode.
2750 When we have only some of our vector isa extensions enabled, then there
2751 are some modes for which vector_mode_supported_p is false. For these
2752 modes, the generic vector support in gcc will choose some non-vector mode
2753 in order to implement the type. By computing the natural mode, we'll
2754 select the proper ABI location for the operand and not depend on whatever
2755 the middle-end decides to do with these vector types. */
2757 static enum machine_mode
2758 type_natural_mode (tree type
)
2760 enum machine_mode mode
= TYPE_MODE (type
);
2762 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
2764 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2765 if ((size
== 8 || size
== 16)
2766 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2767 && TYPE_VECTOR_SUBPARTS (type
) > 1)
2769 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2771 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
2772 mode
= MIN_MODE_VECTOR_FLOAT
;
2774 mode
= MIN_MODE_VECTOR_INT
;
2776 /* Get the mode which has this inner mode and number of units. */
2777 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
2778 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
2779 && GET_MODE_INNER (mode
) == innermode
)
2789 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2790 this may not agree with the mode that the type system has chosen for the
2791 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2792 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2795 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
2800 if (orig_mode
!= BLKmode
)
2801 tmp
= gen_rtx_REG (orig_mode
, regno
);
2804 tmp
= gen_rtx_REG (mode
, regno
);
2805 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
2806 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2812 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2813 of this code is to classify each 8bytes of incoming argument by the register
2814 class and assign registers accordingly. */
2816 /* Return the union class of CLASS1 and CLASS2.
2817 See the x86-64 PS ABI for details. */
2819 static enum x86_64_reg_class
2820 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2822 /* Rule #1: If both classes are equal, this is the resulting class. */
2823 if (class1
== class2
)
2826 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2828 if (class1
== X86_64_NO_CLASS
)
2830 if (class2
== X86_64_NO_CLASS
)
2833 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2834 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2835 return X86_64_MEMORY_CLASS
;
2837 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2838 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2839 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2840 return X86_64_INTEGERSI_CLASS
;
2841 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2842 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2843 return X86_64_INTEGER_CLASS
;
2845 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2847 if (class1
== X86_64_X87_CLASS
2848 || class1
== X86_64_X87UP_CLASS
2849 || class1
== X86_64_COMPLEX_X87_CLASS
2850 || class2
== X86_64_X87_CLASS
2851 || class2
== X86_64_X87UP_CLASS
2852 || class2
== X86_64_COMPLEX_X87_CLASS
)
2853 return X86_64_MEMORY_CLASS
;
2855 /* Rule #6: Otherwise class SSE is used. */
2856 return X86_64_SSE_CLASS
;
2859 /* Classify the argument of type TYPE and mode MODE.
2860 CLASSES will be filled by the register class used to pass each word
2861 of the operand. The number of words is returned. In case the parameter
2862 should be passed in memory, 0 is returned. As a special case for zero
2863 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2865 BIT_OFFSET is used internally for handling records and specifies offset
2866 of the offset in bits modulo 256 to avoid overflow cases.
2868 See the x86-64 PS ABI for details.
2872 classify_argument (enum machine_mode mode
, tree type
,
2873 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2875 HOST_WIDE_INT bytes
=
2876 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2877 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2879 /* Variable sized entities are always passed/returned in memory. */
2883 if (mode
!= VOIDmode
2884 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2887 if (type
&& AGGREGATE_TYPE_P (type
))
2891 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2893 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2897 for (i
= 0; i
< words
; i
++)
2898 classes
[i
] = X86_64_NO_CLASS
;
2900 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2901 signalize memory class, so handle it as special case. */
2904 classes
[0] = X86_64_NO_CLASS
;
2908 /* Classify each field of record and merge classes. */
2909 switch (TREE_CODE (type
))
2912 /* For classes first merge in the field of the subclasses. */
2913 if (TYPE_BINFO (type
))
2915 tree binfo
, base_binfo
;
2918 for (binfo
= TYPE_BINFO (type
), basenum
= 0;
2919 BINFO_BASE_ITERATE (binfo
, basenum
, base_binfo
); basenum
++)
2922 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2923 tree type
= BINFO_TYPE (base_binfo
);
2925 num
= classify_argument (TYPE_MODE (type
),
2927 (offset
+ bit_offset
) % 256);
2930 for (i
= 0; i
< num
; i
++)
2932 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2934 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2938 /* And now merge the fields of structure. */
2939 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2941 if (TREE_CODE (field
) == FIELD_DECL
)
2945 /* Bitfields are always classified as integer. Handle them
2946 early, since later code would consider them to be
2947 misaligned integers. */
2948 if (DECL_BIT_FIELD (field
))
2950 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2951 i
< ((int_bit_position (field
) + (bit_offset
% 64))
2952 + tree_low_cst (DECL_SIZE (field
), 0)
2955 merge_classes (X86_64_INTEGER_CLASS
,
2960 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2961 TREE_TYPE (field
), subclasses
,
2962 (int_bit_position (field
)
2963 + bit_offset
) % 256);
2966 for (i
= 0; i
< num
; i
++)
2969 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2971 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2979 /* Arrays are handled as small records. */
2982 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2983 TREE_TYPE (type
), subclasses
, bit_offset
);
2987 /* The partial classes are now full classes. */
2988 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2989 subclasses
[0] = X86_64_SSE_CLASS
;
2990 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2991 subclasses
[0] = X86_64_INTEGER_CLASS
;
2993 for (i
= 0; i
< words
; i
++)
2994 classes
[i
] = subclasses
[i
% num
];
2999 case QUAL_UNION_TYPE
:
3000 /* Unions are similar to RECORD_TYPE but offset is always 0.
3003 /* Unions are not derived. */
3004 gcc_assert (!TYPE_BINFO (type
)
3005 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type
)));
3006 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3008 if (TREE_CODE (field
) == FIELD_DECL
)
3011 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3012 TREE_TYPE (field
), subclasses
,
3016 for (i
= 0; i
< num
; i
++)
3017 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3026 /* Final merger cleanup. */
3027 for (i
= 0; i
< words
; i
++)
3029 /* If one class is MEMORY, everything should be passed in
3031 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3034 /* The X86_64_SSEUP_CLASS should be always preceded by
3035 X86_64_SSE_CLASS. */
3036 if (classes
[i
] == X86_64_SSEUP_CLASS
3037 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3038 classes
[i
] = X86_64_SSE_CLASS
;
3040 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3041 if (classes
[i
] == X86_64_X87UP_CLASS
3042 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3043 classes
[i
] = X86_64_SSE_CLASS
;
3048 /* Compute alignment needed. We align all types to natural boundaries with
3049 exception of XFmode that is aligned to 64bits. */
3050 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3052 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3055 mode_alignment
= 128;
3056 else if (mode
== XCmode
)
3057 mode_alignment
= 256;
3058 if (COMPLEX_MODE_P (mode
))
3059 mode_alignment
/= 2;
3060 /* Misaligned fields are always returned in memory. */
3061 if (bit_offset
% mode_alignment
)
3065 /* for V1xx modes, just use the base mode */
3066 if (VECTOR_MODE_P (mode
)
3067 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3068 mode
= GET_MODE_INNER (mode
);
3070 /* Classification of atomic types. */
3080 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3081 classes
[0] = X86_64_INTEGERSI_CLASS
;
3083 classes
[0] = X86_64_INTEGER_CLASS
;
3087 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3092 if (!(bit_offset
% 64))
3093 classes
[0] = X86_64_SSESF_CLASS
;
3095 classes
[0] = X86_64_SSE_CLASS
;
3098 classes
[0] = X86_64_SSEDF_CLASS
;
3101 classes
[0] = X86_64_X87_CLASS
;
3102 classes
[1] = X86_64_X87UP_CLASS
;
3105 classes
[0] = X86_64_SSE_CLASS
;
3106 classes
[1] = X86_64_SSEUP_CLASS
;
3109 classes
[0] = X86_64_SSE_CLASS
;
3112 classes
[0] = X86_64_SSEDF_CLASS
;
3113 classes
[1] = X86_64_SSEDF_CLASS
;
3116 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3119 /* This modes is larger than 16 bytes. */
3127 classes
[0] = X86_64_SSE_CLASS
;
3128 classes
[1] = X86_64_SSEUP_CLASS
;
3134 classes
[0] = X86_64_SSE_CLASS
;
3140 gcc_assert (VECTOR_MODE_P (mode
));
3145 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3147 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3148 classes
[0] = X86_64_INTEGERSI_CLASS
;
3150 classes
[0] = X86_64_INTEGER_CLASS
;
3151 classes
[1] = X86_64_INTEGER_CLASS
;
3152 return 1 + (bytes
> 8);
3156 /* Examine the argument and return set number of register required in each
3157 class. Return 0 iff parameter should be passed in memory. */
3159 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3160 int *int_nregs
, int *sse_nregs
)
3162 enum x86_64_reg_class
class[MAX_CLASSES
];
3163 int n
= classify_argument (mode
, type
, class, 0);
3169 for (n
--; n
>= 0; n
--)
3172 case X86_64_INTEGER_CLASS
:
3173 case X86_64_INTEGERSI_CLASS
:
3176 case X86_64_SSE_CLASS
:
3177 case X86_64_SSESF_CLASS
:
3178 case X86_64_SSEDF_CLASS
:
3181 case X86_64_NO_CLASS
:
3182 case X86_64_SSEUP_CLASS
:
3184 case X86_64_X87_CLASS
:
3185 case X86_64_X87UP_CLASS
:
3189 case X86_64_COMPLEX_X87_CLASS
:
3190 return in_return
? 2 : 0;
3191 case X86_64_MEMORY_CLASS
:
3197 /* Construct container for the argument used by GCC interface. See
3198 FUNCTION_ARG for the detailed description. */
3201 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3202 tree type
, int in_return
, int nintregs
, int nsseregs
,
3203 const int *intreg
, int sse_regno
)
3205 enum machine_mode tmpmode
;
3207 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3208 enum x86_64_reg_class
class[MAX_CLASSES
];
3212 int needed_sseregs
, needed_intregs
;
3213 rtx exp
[MAX_CLASSES
];
3216 n
= classify_argument (mode
, type
, class, 0);
3217 if (TARGET_DEBUG_ARG
)
3220 fprintf (stderr
, "Memory class\n");
3223 fprintf (stderr
, "Classes:");
3224 for (i
= 0; i
< n
; i
++)
3226 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3228 fprintf (stderr
, "\n");
3233 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3236 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3239 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3240 some less clueful developer tries to use floating-point anyway. */
3241 if (needed_sseregs
&& !TARGET_SSE
)
3243 static bool issued_error
;
3246 issued_error
= true;
3248 error ("SSE register return with SSE disabled");
3250 error ("SSE register argument with SSE disabled");
3255 /* First construct simple cases. Avoid SCmode, since we want to use
3256 single register to pass this type. */
3257 if (n
== 1 && mode
!= SCmode
)
3260 case X86_64_INTEGER_CLASS
:
3261 case X86_64_INTEGERSI_CLASS
:
3262 return gen_rtx_REG (mode
, intreg
[0]);
3263 case X86_64_SSE_CLASS
:
3264 case X86_64_SSESF_CLASS
:
3265 case X86_64_SSEDF_CLASS
:
3266 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3267 case X86_64_X87_CLASS
:
3268 case X86_64_COMPLEX_X87_CLASS
:
3269 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3270 case X86_64_NO_CLASS
:
3271 /* Zero sized array, struct or class. */
3276 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3278 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3280 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3281 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3282 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3283 && class[1] == X86_64_INTEGER_CLASS
3284 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3285 && intreg
[0] + 1 == intreg
[1])
3286 return gen_rtx_REG (mode
, intreg
[0]);
3288 /* Otherwise figure out the entries of the PARALLEL. */
3289 for (i
= 0; i
< n
; i
++)
3293 case X86_64_NO_CLASS
:
3295 case X86_64_INTEGER_CLASS
:
3296 case X86_64_INTEGERSI_CLASS
:
3297 /* Merge TImodes on aligned occasions here too. */
3298 if (i
* 8 + 8 > bytes
)
3299 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3300 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3304 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3305 if (tmpmode
== BLKmode
)
3307 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3308 gen_rtx_REG (tmpmode
, *intreg
),
3312 case X86_64_SSESF_CLASS
:
3313 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3314 gen_rtx_REG (SFmode
,
3315 SSE_REGNO (sse_regno
)),
3319 case X86_64_SSEDF_CLASS
:
3320 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3321 gen_rtx_REG (DFmode
,
3322 SSE_REGNO (sse_regno
)),
3326 case X86_64_SSE_CLASS
:
3327 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3331 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3332 gen_rtx_REG (tmpmode
,
3333 SSE_REGNO (sse_regno
)),
3335 if (tmpmode
== TImode
)
3344 /* Empty aligned struct, union or class. */
3348 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3349 for (i
= 0; i
< nexps
; i
++)
3350 XVECEXP (ret
, 0, i
) = exp
[i
];
3354 /* Update the data in CUM to advance over an argument
3355 of mode MODE and data type TYPE.
3356 (TYPE is null for libcalls where that information may not be available.) */
3359 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3360 tree type
, int named
)
3363 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3364 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3367 mode
= type_natural_mode (type
);
3369 if (TARGET_DEBUG_ARG
)
3370 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3371 "mode=%s, named=%d)\n\n",
3372 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3373 GET_MODE_NAME (mode
), named
);
3377 int int_nregs
, sse_nregs
;
3378 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3379 cum
->words
+= words
;
3380 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3382 cum
->nregs
-= int_nregs
;
3383 cum
->sse_nregs
-= sse_nregs
;
3384 cum
->regno
+= int_nregs
;
3385 cum
->sse_regno
+= sse_nregs
;
3388 cum
->words
+= words
;
3406 cum
->words
+= words
;
3407 cum
->nregs
-= words
;
3408 cum
->regno
+= words
;
3410 if (cum
->nregs
<= 0)
3418 if (cum
->float_in_sse
< 2)
3421 if (cum
->float_in_sse
< 1)
3432 if (!type
|| !AGGREGATE_TYPE_P (type
))
3434 cum
->sse_words
+= words
;
3435 cum
->sse_nregs
-= 1;
3436 cum
->sse_regno
+= 1;
3437 if (cum
->sse_nregs
<= 0)
3449 if (!type
|| !AGGREGATE_TYPE_P (type
))
3451 cum
->mmx_words
+= words
;
3452 cum
->mmx_nregs
-= 1;
3453 cum
->mmx_regno
+= 1;
3454 if (cum
->mmx_nregs
<= 0)
3465 /* Define where to put the arguments to a function.
3466 Value is zero to push the argument on the stack,
3467 or a hard register in which to store the argument.
3469 MODE is the argument's machine mode.
3470 TYPE is the data type of the argument (as a tree).
3471 This is null for libcalls where that information may
3473 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3474 the preceding args and about the function being called.
3475 NAMED is nonzero if this argument is a named parameter
3476 (otherwise it is an extra parameter matching an ellipsis). */
3479 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3480 tree type
, int named
)
3482 enum machine_mode mode
= orig_mode
;
3485 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3486 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3487 static bool warnedsse
, warnedmmx
;
3489 /* To simplify the code below, represent vector types with a vector mode
3490 even if MMX/SSE are not active. */
3491 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3492 mode
= type_natural_mode (type
);
3494 /* Handle a hidden AL argument containing number of registers for varargs
3495 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3497 if (mode
== VOIDmode
)
3500 return GEN_INT (cum
->maybe_vaarg
3501 ? (cum
->sse_nregs
< 0
3509 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3511 &x86_64_int_parameter_registers
[cum
->regno
],
3516 /* For now, pass fp/complex values on the stack. */
3528 if (words
<= cum
->nregs
)
3530 int regno
= cum
->regno
;
3532 /* Fastcall allocates the first two DWORD (SImode) or
3533 smaller arguments to ECX and EDX. */
3536 if (mode
== BLKmode
|| mode
== DImode
)
3539 /* ECX not EAX is the first allocated register. */
3543 ret
= gen_rtx_REG (mode
, regno
);
3547 if (cum
->float_in_sse
< 2)
3550 if (cum
->float_in_sse
< 1)
3560 if (!type
|| !AGGREGATE_TYPE_P (type
))
3562 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
3565 warning (0, "SSE vector argument without SSE enabled "
3569 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3570 cum
->sse_regno
+ FIRST_SSE_REG
);
3577 if (!type
|| !AGGREGATE_TYPE_P (type
))
3579 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
3582 warning (0, "MMX vector argument without MMX enabled "
3586 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3587 cum
->mmx_regno
+ FIRST_MMX_REG
);
3592 if (TARGET_DEBUG_ARG
)
3595 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3596 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
3599 print_simple_rtl (stderr
, ret
);
3601 fprintf (stderr
, ", stack");
3603 fprintf (stderr
, " )\n");
3609 /* A C expression that indicates when an argument must be passed by
3610 reference. If nonzero for an argument, a copy of that argument is
3611 made in memory and a pointer to the argument is passed instead of
3612 the argument itself. The pointer is passed in whatever way is
3613 appropriate for passing a pointer to that type. */
3616 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3617 enum machine_mode mode ATTRIBUTE_UNUSED
,
3618 tree type
, bool named ATTRIBUTE_UNUSED
)
3623 if (type
&& int_size_in_bytes (type
) == -1)
3625 if (TARGET_DEBUG_ARG
)
3626 fprintf (stderr
, "function_arg_pass_by_reference\n");
3633 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3634 ABI. Only called if TARGET_SSE. */
3636 contains_128bit_aligned_vector_p (tree type
)
3638 enum machine_mode mode
= TYPE_MODE (type
);
3639 if (SSE_REG_MODE_P (mode
)
3640 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3642 if (TYPE_ALIGN (type
) < 128)
3645 if (AGGREGATE_TYPE_P (type
))
3647 /* Walk the aggregates recursively. */
3648 switch (TREE_CODE (type
))
3652 case QUAL_UNION_TYPE
:
3656 if (TYPE_BINFO (type
))
3658 tree binfo
, base_binfo
;
3661 for (binfo
= TYPE_BINFO (type
), i
= 0;
3662 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
3663 if (contains_128bit_aligned_vector_p
3664 (BINFO_TYPE (base_binfo
)))
3667 /* And now merge the fields of structure. */
3668 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3670 if (TREE_CODE (field
) == FIELD_DECL
3671 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
3678 /* Just for use if some languages passes arrays by value. */
3679 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
3690 /* Gives the alignment boundary, in bits, of an argument with the
3691 specified mode and type. */
3694 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
3698 align
= TYPE_ALIGN (type
);
3700 align
= GET_MODE_ALIGNMENT (mode
);
3701 if (align
< PARM_BOUNDARY
)
3702 align
= PARM_BOUNDARY
;
3705 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3706 make an exception for SSE modes since these require 128bit
3709 The handling here differs from field_alignment. ICC aligns MMX
3710 arguments to 4 byte boundaries, while structure fields are aligned
3711 to 8 byte boundaries. */
3713 align
= PARM_BOUNDARY
;
3716 if (!SSE_REG_MODE_P (mode
))
3717 align
= PARM_BOUNDARY
;
3721 if (!contains_128bit_aligned_vector_p (type
))
3722 align
= PARM_BOUNDARY
;
3730 /* Return true if N is a possible register number of function value. */
3732 ix86_function_value_regno_p (int regno
)
3735 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
3736 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
3740 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
3746 /* Define how to find the value returned by a function.
3747 VALTYPE is the data type of the value (as a tree).
3748 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3749 otherwise, FUNC is 0. */
3751 ix86_function_value (tree valtype
, tree fntype_or_decl
,
3752 bool outgoing ATTRIBUTE_UNUSED
)
3754 enum machine_mode natmode
= type_natural_mode (valtype
);
3758 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
3759 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
3760 x86_64_int_return_registers
, 0);
3761 /* For zero sized structures, construct_container return NULL, but we
3762 need to keep rest of compiler happy by returning meaningful value. */
3764 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
3769 tree fn
= NULL_TREE
, fntype
;
3771 && DECL_P (fntype_or_decl
))
3772 fn
= fntype_or_decl
;
3773 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
3774 return gen_rtx_REG (TYPE_MODE (valtype
),
3775 ix86_value_regno (natmode
, fn
, fntype
));
3779 /* Return true iff type is returned in memory. */
3781 ix86_return_in_memory (tree type
)
3783 int needed_intregs
, needed_sseregs
, size
;
3784 enum machine_mode mode
= type_natural_mode (type
);
3787 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
3789 if (mode
== BLKmode
)
3792 size
= int_size_in_bytes (type
);
3794 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
3797 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
3799 /* User-created vectors small enough to fit in EAX. */
3803 /* MMX/3dNow values are returned in MM0,
3804 except when it doesn't exits. */
3806 return (TARGET_MMX
? 0 : 1);
3808 /* SSE values are returned in XMM0, except when it doesn't exist. */
3810 return (TARGET_SSE
? 0 : 1);
3821 /* When returning SSE vector types, we have a choice of either
3822 (1) being abi incompatible with a -march switch, or
3823 (2) generating an error.
3824 Given no good solution, I think the safest thing is one warning.
3825 The user won't be able to use -Werror, but....
3827 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3828 called in response to actually generating a caller or callee that
3829 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3830 via aggregate_value_p for general type probing from tree-ssa. */
3833 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
3835 static bool warnedsse
, warnedmmx
;
3839 /* Look at the return type of the function, not the function type. */
3840 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3842 if (!TARGET_SSE
&& !warnedsse
)
3845 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3848 warning (0, "SSE vector return without SSE enabled "
3853 if (!TARGET_MMX
&& !warnedmmx
)
3855 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
3858 warning (0, "MMX vector return without MMX enabled "
3867 /* Define how to find the value returned by a library function
3868 assuming the value has mode MODE. */
3870 ix86_libcall_value (enum machine_mode mode
)
3881 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
3884 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
3888 return gen_rtx_REG (mode
, 0);
3892 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
3895 /* Given a mode, return the register to use for a return value. */
3898 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
3900 gcc_assert (!TARGET_64BIT
);
3902 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3903 we prevent this case when mmx is not available. */
3904 if ((VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8))
3905 return FIRST_MMX_REG
;
3907 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3908 we prevent this case when sse is not available. */
3909 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3910 return FIRST_SSE_REG
;
3912 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3913 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
3916 /* Floating point return values in %st(0), except for local functions when
3917 SSE math is enabled or for functions with sseregparm attribute. */
3918 if ((func
|| fntype
)
3919 && (mode
== SFmode
|| mode
== DFmode
))
3921 int sse_level
= ix86_function_sseregparm (fntype
, func
);
3922 if ((sse_level
>= 1 && mode
== SFmode
)
3923 || (sse_level
== 2 && mode
== DFmode
))
3924 return FIRST_SSE_REG
;
3927 return FIRST_FLOAT_REG
;
3930 /* Create the va_list data type. */
3933 ix86_build_builtin_va_list (void)
3935 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
3937 /* For i386 we use plain pointer to argument area. */
3939 return build_pointer_type (char_type_node
);
3941 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3942 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3944 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
3945 unsigned_type_node
);
3946 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
3947 unsigned_type_node
);
3948 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
3950 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
3953 va_list_gpr_counter_field
= f_gpr
;
3954 va_list_fpr_counter_field
= f_fpr
;
3956 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3957 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3958 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3959 DECL_FIELD_CONTEXT (f_sav
) = record
;
3961 TREE_CHAIN (record
) = type_decl
;
3962 TYPE_NAME (record
) = type_decl
;
3963 TYPE_FIELDS (record
) = f_gpr
;
3964 TREE_CHAIN (f_gpr
) = f_fpr
;
3965 TREE_CHAIN (f_fpr
) = f_ovf
;
3966 TREE_CHAIN (f_ovf
) = f_sav
;
3968 layout_type (record
);
3970 /* The correct type is an array type of one element. */
3971 return build_array_type (record
, build_index_type (size_zero_node
));
3974 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3977 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3978 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
3981 CUMULATIVE_ARGS next_cum
;
3982 rtx save_area
= NULL_RTX
, mem
;
3995 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
3998 /* Indicate to allocate space on the stack for varargs save area. */
3999 ix86_save_varrargs_registers
= 1;
4001 cfun
->stack_alignment_needed
= 128;
4003 fntype
= TREE_TYPE (current_function_decl
);
4004 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4005 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4006 != void_type_node
));
4008 /* For varargs, we do not want to skip the dummy va_dcl argument.
4009 For stdargs, we do want to skip the last named argument. */
4012 function_arg_advance (&next_cum
, mode
, type
, 1);
4015 save_area
= frame_pointer_rtx
;
4017 set
= get_varargs_alias_set ();
4019 for (i
= next_cum
.regno
;
4021 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4024 mem
= gen_rtx_MEM (Pmode
,
4025 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4026 MEM_NOTRAP_P (mem
) = 1;
4027 set_mem_alias_set (mem
, set
);
4028 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4029 x86_64_int_parameter_registers
[i
]));
4032 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4034 /* Now emit code to save SSE registers. The AX parameter contains number
4035 of SSE parameter registers used to call this function. We use
4036 sse_prologue_save insn template that produces computed jump across
4037 SSE saves. We need some preparation work to get this working. */
4039 label
= gen_label_rtx ();
4040 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4042 /* Compute address to jump to :
4043 label - 5*eax + nnamed_sse_arguments*5 */
4044 tmp_reg
= gen_reg_rtx (Pmode
);
4045 nsse_reg
= gen_reg_rtx (Pmode
);
4046 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4047 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4048 gen_rtx_MULT (Pmode
, nsse_reg
,
4050 if (next_cum
.sse_regno
)
4053 gen_rtx_CONST (DImode
,
4054 gen_rtx_PLUS (DImode
,
4056 GEN_INT (next_cum
.sse_regno
* 4))));
4058 emit_move_insn (nsse_reg
, label_ref
);
4059 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4061 /* Compute address of memory block we save into. We always use pointer
4062 pointing 127 bytes after first byte to store - this is needed to keep
4063 instruction size limited by 4 bytes. */
4064 tmp_reg
= gen_reg_rtx (Pmode
);
4065 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4066 plus_constant (save_area
,
4067 8 * REGPARM_MAX
+ 127)));
4068 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4069 MEM_NOTRAP_P (mem
) = 1;
4070 set_mem_alias_set (mem
, set
);
4071 set_mem_align (mem
, BITS_PER_WORD
);
4073 /* And finally do the dirty job! */
4074 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4075 GEN_INT (next_cum
.sse_regno
), label
));
4080 /* Implement va_start. */
4083 ix86_va_start (tree valist
, rtx nextarg
)
4085 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4086 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4087 tree gpr
, fpr
, ovf
, sav
, t
;
4089 /* Only 64bit target needs something special. */
4092 std_expand_builtin_va_start (valist
, nextarg
);
4096 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4097 f_fpr
= TREE_CHAIN (f_gpr
);
4098 f_ovf
= TREE_CHAIN (f_fpr
);
4099 f_sav
= TREE_CHAIN (f_ovf
);
4101 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4102 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4103 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4104 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4105 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4107 /* Count number of gp and fp argument registers used. */
4108 words
= current_function_args_info
.words
;
4109 n_gpr
= current_function_args_info
.regno
;
4110 n_fpr
= current_function_args_info
.sse_regno
;
4112 if (TARGET_DEBUG_ARG
)
4113 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4114 (int) words
, (int) n_gpr
, (int) n_fpr
);
4116 if (cfun
->va_list_gpr_size
)
4118 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
4119 build_int_cst (NULL_TREE
, n_gpr
* 8));
4120 TREE_SIDE_EFFECTS (t
) = 1;
4121 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4124 if (cfun
->va_list_fpr_size
)
4126 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
4127 build_int_cst (NULL_TREE
, n_fpr
* 16 + 8*REGPARM_MAX
));
4128 TREE_SIDE_EFFECTS (t
) = 1;
4129 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4132 /* Find the overflow area. */
4133 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
4135 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
4136 build_int_cst (NULL_TREE
, words
* UNITS_PER_WORD
));
4137 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
4138 TREE_SIDE_EFFECTS (t
) = 1;
4139 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4141 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4143 /* Find the register save area.
4144 Prologue of the function save it right above stack frame. */
4145 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
4146 t
= build2 (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
4147 TREE_SIDE_EFFECTS (t
) = 1;
4148 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4152 /* Implement va_arg. */
4155 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4157 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4158 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4159 tree gpr
, fpr
, ovf
, sav
, t
;
4161 tree lab_false
, lab_over
= NULL_TREE
;
4166 enum machine_mode nat_mode
;
4168 /* Only 64bit target needs something special. */
4170 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4172 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4173 f_fpr
= TREE_CHAIN (f_gpr
);
4174 f_ovf
= TREE_CHAIN (f_fpr
);
4175 f_sav
= TREE_CHAIN (f_ovf
);
4177 valist
= build_va_arg_indirect_ref (valist
);
4178 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4179 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4180 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4181 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4183 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4185 type
= build_pointer_type (type
);
4186 size
= int_size_in_bytes (type
);
4187 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4189 nat_mode
= type_natural_mode (type
);
4190 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4191 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4193 /* Pull the value out of the saved registers. */
4195 addr
= create_tmp_var (ptr_type_node
, "addr");
4196 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4200 int needed_intregs
, needed_sseregs
;
4202 tree int_addr
, sse_addr
;
4204 lab_false
= create_artificial_label ();
4205 lab_over
= create_artificial_label ();
4207 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4209 need_temp
= (!REG_P (container
)
4210 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4211 || TYPE_ALIGN (type
) > 128));
4213 /* In case we are passing structure, verify that it is consecutive block
4214 on the register save area. If not we need to do moves. */
4215 if (!need_temp
&& !REG_P (container
))
4217 /* Verify that all registers are strictly consecutive */
4218 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4222 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4224 rtx slot
= XVECEXP (container
, 0, i
);
4225 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4226 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4234 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4236 rtx slot
= XVECEXP (container
, 0, i
);
4237 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4238 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4250 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4251 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4252 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4253 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4256 /* First ensure that we fit completely in registers. */
4259 t
= build_int_cst (TREE_TYPE (gpr
),
4260 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4261 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4262 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4263 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4264 gimplify_and_add (t
, pre_p
);
4268 t
= build_int_cst (TREE_TYPE (fpr
),
4269 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4271 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4272 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4273 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4274 gimplify_and_add (t
, pre_p
);
4277 /* Compute index to start of area used for integer regs. */
4280 /* int_addr = gpr + sav; */
4281 t
= fold_convert (ptr_type_node
, gpr
);
4282 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4283 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
4284 gimplify_and_add (t
, pre_p
);
4288 /* sse_addr = fpr + sav; */
4289 t
= fold_convert (ptr_type_node
, fpr
);
4290 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4291 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
4292 gimplify_and_add (t
, pre_p
);
4297 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4300 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4301 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4302 gimplify_and_add (t
, pre_p
);
4304 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4306 rtx slot
= XVECEXP (container
, 0, i
);
4307 rtx reg
= XEXP (slot
, 0);
4308 enum machine_mode mode
= GET_MODE (reg
);
4309 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4310 tree addr_type
= build_pointer_type (piece_type
);
4313 tree dest_addr
, dest
;
4315 if (SSE_REGNO_P (REGNO (reg
)))
4317 src_addr
= sse_addr
;
4318 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4322 src_addr
= int_addr
;
4323 src_offset
= REGNO (reg
) * 8;
4325 src_addr
= fold_convert (addr_type
, src_addr
);
4326 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4327 size_int (src_offset
)));
4328 src
= build_va_arg_indirect_ref (src_addr
);
4330 dest_addr
= fold_convert (addr_type
, addr
);
4331 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4332 size_int (INTVAL (XEXP (slot
, 1)))));
4333 dest
= build_va_arg_indirect_ref (dest_addr
);
4335 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
4336 gimplify_and_add (t
, pre_p
);
4342 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4343 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4344 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
4345 gimplify_and_add (t
, pre_p
);
4349 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4350 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4351 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
4352 gimplify_and_add (t
, pre_p
);
4355 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4356 gimplify_and_add (t
, pre_p
);
4358 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4359 append_to_statement_list (t
, pre_p
);
4362 /* ... otherwise out of the overflow area. */
4364 /* Care for on-stack alignment if needed. */
4365 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4366 || integer_zerop (TYPE_SIZE (type
)))
4370 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4371 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4372 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4373 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4374 build_int_cst (TREE_TYPE (t
), -align
));
4376 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4378 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4379 gimplify_and_add (t2
, pre_p
);
4381 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4382 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4383 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
4384 gimplify_and_add (t
, pre_p
);
4388 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4389 append_to_statement_list (t
, pre_p
);
4392 ptrtype
= build_pointer_type (type
);
4393 addr
= fold_convert (ptrtype
, addr
);
4396 addr
= build_va_arg_indirect_ref (addr
);
4397 return build_va_arg_indirect_ref (addr
);
4400 /* Return nonzero if OPNUM's MEM should be matched
4401 in movabs* patterns. */
4404 ix86_check_movabs (rtx insn
, int opnum
)
4408 set
= PATTERN (insn
);
4409 if (GET_CODE (set
) == PARALLEL
)
4410 set
= XVECEXP (set
, 0, 0);
4411 gcc_assert (GET_CODE (set
) == SET
);
4412 mem
= XEXP (set
, opnum
);
4413 while (GET_CODE (mem
) == SUBREG
)
4414 mem
= SUBREG_REG (mem
);
4415 gcc_assert (GET_CODE (mem
) == MEM
);
4416 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4419 /* Initialize the table of extra 80387 mathematical constants. */
4422 init_ext_80387_constants (void)
4424 static const char * cst
[5] =
4426 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4427 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4428 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4429 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4430 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4434 for (i
= 0; i
< 5; i
++)
4436 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4437 /* Ensure each constant is rounded to XFmode precision. */
4438 real_convert (&ext_80387_constants_table
[i
],
4439 XFmode
, &ext_80387_constants_table
[i
]);
4442 ext_80387_constants_init
= 1;
4445 /* Return true if the constant is something that can be loaded with
4446 a special instruction. */
4449 standard_80387_constant_p (rtx x
)
4451 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4454 if (x
== CONST0_RTX (GET_MODE (x
)))
4456 if (x
== CONST1_RTX (GET_MODE (x
)))
4459 /* For XFmode constants, try to find a special 80387 instruction when
4460 optimizing for size or on those CPUs that benefit from them. */
4461 if (GET_MODE (x
) == XFmode
4462 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4467 if (! ext_80387_constants_init
)
4468 init_ext_80387_constants ();
4470 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4471 for (i
= 0; i
< 5; i
++)
4472 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4479 /* Return the opcode of the special instruction to be used to load
4483 standard_80387_constant_opcode (rtx x
)
4485 switch (standard_80387_constant_p (x
))
4506 /* Return the CONST_DOUBLE representing the 80387 constant that is
4507 loaded by the specified special instruction. The argument IDX
4508 matches the return value from standard_80387_constant_p. */
4511 standard_80387_constant_rtx (int idx
)
4515 if (! ext_80387_constants_init
)
4516 init_ext_80387_constants ();
4532 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4536 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4539 standard_sse_constant_p (rtx x
)
4541 if (x
== const0_rtx
)
4543 return (x
== CONST0_RTX (GET_MODE (x
)));
4546 /* Returns 1 if OP contains a symbol reference */
4549 symbolic_reference_mentioned_p (rtx op
)
4554 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4557 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4558 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4564 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4565 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4569 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4576 /* Return 1 if it is appropriate to emit `ret' instructions in the
4577 body of a function. Do this only if the epilogue is simple, needing a
4578 couple of insns. Prior to reloading, we can't tell how many registers
4579 must be saved, so return 0 then. Return 0 if there is no frame
4580 marker to de-allocate. */
4583 ix86_can_use_return_insn_p (void)
4585 struct ix86_frame frame
;
4587 if (! reload_completed
|| frame_pointer_needed
)
4590 /* Don't allow more than 32 pop, since that's all we can do
4591 with one instruction. */
4592 if (current_function_pops_args
4593 && current_function_args_size
>= 32768)
4596 ix86_compute_frame_layout (&frame
);
4597 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4600 /* Value should be nonzero if functions must have frame pointers.
4601 Zero means the frame pointer need not be set up (and parms may
4602 be accessed via the stack pointer) in functions that seem suitable. */
4605 ix86_frame_pointer_required (void)
4607 /* If we accessed previous frames, then the generated code expects
4608 to be able to access the saved ebp value in our frame. */
4609 if (cfun
->machine
->accesses_prev_frame
)
4612 /* Several x86 os'es need a frame pointer for other reasons,
4613 usually pertaining to setjmp. */
4614 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4617 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4618 the frame pointer by default. Turn it back on now if we've not
4619 got a leaf function. */
4620 if (TARGET_OMIT_LEAF_FRAME_POINTER
4621 && (!current_function_is_leaf
4622 || ix86_current_function_calls_tls_descriptor
))
4625 if (current_function_profile
)
4631 /* Record that the current function accesses previous call frames. */
4634 ix86_setup_frame_addresses (void)
4636 cfun
->machine
->accesses_prev_frame
= 1;
4639 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4640 # define USE_HIDDEN_LINKONCE 1
4642 # define USE_HIDDEN_LINKONCE 0
4645 static int pic_labels_used
;
4647 /* Fills in the label name that should be used for a pc thunk for
4648 the given register. */
4651 get_pc_thunk_name (char name
[32], unsigned int regno
)
4653 if (USE_HIDDEN_LINKONCE
)
4654 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4656 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4660 /* This function generates code for -fpic that loads %ebx with
4661 the return address of the caller and then returns. */
4664 ix86_file_end (void)
4669 for (regno
= 0; regno
< 8; ++regno
)
4673 if (! ((pic_labels_used
>> regno
) & 1))
4676 get_pc_thunk_name (name
, regno
);
4681 switch_to_section (darwin_sections
[text_coal_section
]);
4682 fputs ("\t.weak_definition\t", asm_out_file
);
4683 assemble_name (asm_out_file
, name
);
4684 fputs ("\n\t.private_extern\t", asm_out_file
);
4685 assemble_name (asm_out_file
, name
);
4686 fputs ("\n", asm_out_file
);
4687 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4691 if (USE_HIDDEN_LINKONCE
)
4695 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4697 TREE_PUBLIC (decl
) = 1;
4698 TREE_STATIC (decl
) = 1;
4699 DECL_ONE_ONLY (decl
) = 1;
4701 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4702 switch_to_section (get_named_section (decl
, NULL
, 0));
4704 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
4705 fputs ("\t.hidden\t", asm_out_file
);
4706 assemble_name (asm_out_file
, name
);
4707 fputc ('\n', asm_out_file
);
4708 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4712 switch_to_section (text_section
);
4713 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4716 xops
[0] = gen_rtx_REG (SImode
, regno
);
4717 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4718 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4719 output_asm_insn ("ret", xops
);
4722 if (NEED_INDICATE_EXEC_STACK
)
4723 file_end_indicate_exec_stack ();
4726 /* Emit code for the SET_GOT patterns. */
4729 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
4734 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4736 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4738 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
4741 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4743 output_asm_insn ("call\t%a2", xops
);
4746 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4747 is what will be referenced by the Mach-O PIC subsystem. */
4749 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4752 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4753 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4756 output_asm_insn ("pop{l}\t%0", xops
);
4761 get_pc_thunk_name (name
, REGNO (dest
));
4762 pic_labels_used
|= 1 << REGNO (dest
);
4764 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4765 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4766 output_asm_insn ("call\t%X2", xops
);
4767 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4768 is what will be referenced by the Mach-O PIC subsystem. */
4771 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4773 targetm
.asm_out
.internal_label (asm_out_file
, "L",
4774 CODE_LABEL_NUMBER (label
));
4781 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4782 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4784 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
4789 /* Generate an "push" pattern for input ARG. */
4794 return gen_rtx_SET (VOIDmode
,
4796 gen_rtx_PRE_DEC (Pmode
,
4797 stack_pointer_rtx
)),
4801 /* Return >= 0 if there is an unused call-clobbered register available
4802 for the entire function. */
4805 ix86_select_alt_pic_regnum (void)
4807 if (current_function_is_leaf
&& !current_function_profile
4808 && !ix86_current_function_calls_tls_descriptor
)
4811 for (i
= 2; i
>= 0; --i
)
4812 if (!regs_ever_live
[i
])
4816 return INVALID_REGNUM
;
4819 /* Return 1 if we need to save REGNO. */
4821 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
4823 if (pic_offset_table_rtx
4824 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4825 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4826 || current_function_profile
4827 || current_function_calls_eh_return
4828 || current_function_uses_const_pool
))
4830 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4835 if (current_function_calls_eh_return
&& maybe_eh_return
)
4840 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4841 if (test
== INVALID_REGNUM
)
4848 if (cfun
->machine
->force_align_arg_pointer
4849 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
4852 return (regs_ever_live
[regno
]
4853 && !call_used_regs
[regno
]
4854 && !fixed_regs
[regno
]
4855 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4858 /* Return number of registers to be saved on the stack. */
4861 ix86_nsaved_regs (void)
4866 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4867 if (ix86_save_reg (regno
, true))
4872 /* Return the offset between two registers, one to be eliminated, and the other
4873 its replacement, at the start of a routine. */
4876 ix86_initial_elimination_offset (int from
, int to
)
4878 struct ix86_frame frame
;
4879 ix86_compute_frame_layout (&frame
);
4881 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4882 return frame
.hard_frame_pointer_offset
;
4883 else if (from
== FRAME_POINTER_REGNUM
4884 && to
== HARD_FRAME_POINTER_REGNUM
)
4885 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4888 gcc_assert (to
== STACK_POINTER_REGNUM
);
4890 if (from
== ARG_POINTER_REGNUM
)
4891 return frame
.stack_pointer_offset
;
4893 gcc_assert (from
== FRAME_POINTER_REGNUM
);
4894 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4898 /* Fill structure ix86_frame about frame of currently computed function. */
4901 ix86_compute_frame_layout (struct ix86_frame
*frame
)
4903 HOST_WIDE_INT total_size
;
4904 unsigned int stack_alignment_needed
;
4905 HOST_WIDE_INT offset
;
4906 unsigned int preferred_alignment
;
4907 HOST_WIDE_INT size
= get_frame_size ();
4909 frame
->nregs
= ix86_nsaved_regs ();
4912 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4913 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4915 /* During reload iteration the amount of registers saved can change.
4916 Recompute the value as needed. Do not recompute when amount of registers
4917 didn't change as reload does multiple calls to the function and does not
4918 expect the decision to change within single iteration. */
4920 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
4922 int count
= frame
->nregs
;
4924 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
4925 /* The fast prologue uses move instead of push to save registers. This
4926 is significantly longer, but also executes faster as modern hardware
4927 can execute the moves in parallel, but can't do that for push/pop.
4929 Be careful about choosing what prologue to emit: When function takes
4930 many instructions to execute we may use slow version as well as in
4931 case function is known to be outside hot spot (this is known with
4932 feedback only). Weight the size of function by number of registers
4933 to save as it is cheap to use one or two push instructions but very
4934 slow to use many of them. */
4936 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
4937 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
4938 || (flag_branch_probabilities
4939 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
4940 cfun
->machine
->use_fast_prologue_epilogue
= false;
4942 cfun
->machine
->use_fast_prologue_epilogue
4943 = !expensive_function_p (count
);
4945 if (TARGET_PROLOGUE_USING_MOVE
4946 && cfun
->machine
->use_fast_prologue_epilogue
)
4947 frame
->save_regs_using_mov
= true;
4949 frame
->save_regs_using_mov
= false;
4952 /* Skip return address and saved base pointer. */
4953 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4955 frame
->hard_frame_pointer_offset
= offset
;
4957 /* Do some sanity checking of stack_alignment_needed and
4958 preferred_alignment, since i386 port is the only using those features
4959 that may break easily. */
4961 gcc_assert (!size
|| stack_alignment_needed
);
4962 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
4963 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
4964 gcc_assert (stack_alignment_needed
4965 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
4967 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4968 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4970 /* Register save area */
4971 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4974 if (ix86_save_varrargs_registers
)
4976 offset
+= X86_64_VARARGS_SIZE
;
4977 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4980 frame
->va_arg_size
= 0;
4982 /* Align start of frame for local function. */
4983 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4984 & -stack_alignment_needed
) - offset
;
4986 offset
+= frame
->padding1
;
4988 /* Frame pointer points here. */
4989 frame
->frame_pointer_offset
= offset
;
4993 /* Add outgoing arguments area. Can be skipped if we eliminated
4994 all the function calls as dead code.
4995 Skipping is however impossible when function calls alloca. Alloca
4996 expander assumes that last current_function_outgoing_args_size
4997 of stack frame are unused. */
4998 if (ACCUMULATE_OUTGOING_ARGS
4999 && (!current_function_is_leaf
|| current_function_calls_alloca
5000 || ix86_current_function_calls_tls_descriptor
))
5002 offset
+= current_function_outgoing_args_size
;
5003 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5006 frame
->outgoing_arguments_size
= 0;
5008 /* Align stack boundary. Only needed if we're calling another function
5010 if (!current_function_is_leaf
|| current_function_calls_alloca
5011 || ix86_current_function_calls_tls_descriptor
)
5012 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5013 & -preferred_alignment
) - offset
;
5015 frame
->padding2
= 0;
5017 offset
+= frame
->padding2
;
5019 /* We've reached end of stack frame. */
5020 frame
->stack_pointer_offset
= offset
;
5022 /* Size prologue needs to allocate. */
5023 frame
->to_allocate
=
5024 (size
+ frame
->padding1
+ frame
->padding2
5025 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5027 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5028 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5029 frame
->save_regs_using_mov
= false;
5031 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5032 && current_function_is_leaf
5033 && !ix86_current_function_calls_tls_descriptor
)
5035 frame
->red_zone_size
= frame
->to_allocate
;
5036 if (frame
->save_regs_using_mov
)
5037 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5038 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5039 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5042 frame
->red_zone_size
= 0;
5043 frame
->to_allocate
-= frame
->red_zone_size
;
5044 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5046 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
5047 fprintf (stderr
, "size: %i\n", size
);
5048 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
5049 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
5050 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
5051 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
5052 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
5053 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
5054 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
5055 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
5056 frame
->hard_frame_pointer_offset
);
5057 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
5061 /* Emit code to save registers in the prologue. */
5064 ix86_emit_save_regs (void)
5069 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5070 if (ix86_save_reg (regno
, true))
5072 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5073 RTX_FRAME_RELATED_P (insn
) = 1;
5077 /* Emit code to save registers using MOV insns. First register
5078 is restored from POINTER + OFFSET. */
5080 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5085 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5086 if (ix86_save_reg (regno
, true))
5088 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5090 gen_rtx_REG (Pmode
, regno
));
5091 RTX_FRAME_RELATED_P (insn
) = 1;
5092 offset
+= UNITS_PER_WORD
;
5096 /* Expand prologue or epilogue stack adjustment.
5097 The pattern exist to put a dependency on all ebp-based memory accesses.
5098 STYLE should be negative if instructions should be marked as frame related,
5099 zero if %r11 register is live and cannot be freely used and positive
5103 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5108 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5109 else if (x86_64_immediate_operand (offset
, DImode
))
5110 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5114 /* r11 is used by indirect sibcall return as well, set before the
5115 epilogue and used after the epilogue. ATM indirect sibcall
5116 shouldn't be used together with huge frame sizes in one
5117 function because of the frame_size check in sibcall.c. */
5119 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5120 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5122 RTX_FRAME_RELATED_P (insn
) = 1;
5123 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5127 RTX_FRAME_RELATED_P (insn
) = 1;
5130 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5133 ix86_internal_arg_pointer (void)
5135 if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5136 && DECL_NAME (current_function_decl
)
5137 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5138 && DECL_FILE_SCOPE_P (current_function_decl
))
5140 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5141 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5144 return virtual_incoming_args_rtx
;
5147 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5148 This is called from dwarf2out.c to emit call frame instructions
5149 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5151 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5153 rtx unspec
= SET_SRC (pattern
);
5154 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5158 case UNSPEC_REG_SAVE
:
5159 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5160 SET_DEST (pattern
));
5162 case UNSPEC_DEF_CFA
:
5163 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5164 INTVAL (XVECEXP (unspec
, 0, 0)));
5171 /* Expand the prologue into a bunch of separate insns. */
5174 ix86_expand_prologue (void)
5178 struct ix86_frame frame
;
5179 HOST_WIDE_INT allocate
;
5181 ix86_compute_frame_layout (&frame
);
5183 if (cfun
->machine
->force_align_arg_pointer
)
5187 /* Grab the argument pointer. */
5188 x
= plus_constant (stack_pointer_rtx
, 4);
5189 y
= cfun
->machine
->force_align_arg_pointer
;
5190 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5191 RTX_FRAME_RELATED_P (insn
) = 1;
5193 /* The unwind info consists of two parts: install the fafp as the cfa,
5194 and record the fafp as the "save register" of the stack pointer.
5195 The later is there in order that the unwinder can see where it
5196 should restore the stack pointer across the and insn. */
5197 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5198 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5199 RTX_FRAME_RELATED_P (x
) = 1;
5200 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5202 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5203 RTX_FRAME_RELATED_P (y
) = 1;
5204 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5205 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5206 REG_NOTES (insn
) = x
;
5208 /* Align the stack. */
5209 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5212 /* And here we cheat like madmen with the unwind info. We force the
5213 cfa register back to sp+4, which is exactly what it was at the
5214 start of the function. Re-pushing the return address results in
5215 the return at the same spot relative to the cfa, and thus is
5216 correct wrt the unwind info. */
5217 x
= cfun
->machine
->force_align_arg_pointer
;
5218 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5219 insn
= emit_insn (gen_push (x
));
5220 RTX_FRAME_RELATED_P (insn
) = 1;
5223 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5224 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5225 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5226 REG_NOTES (insn
) = x
;
5229 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5230 slower on all targets. Also sdb doesn't like it. */
5232 if (frame_pointer_needed
)
5234 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5235 RTX_FRAME_RELATED_P (insn
) = 1;
5237 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5238 RTX_FRAME_RELATED_P (insn
) = 1;
5241 allocate
= frame
.to_allocate
;
5243 if (!frame
.save_regs_using_mov
)
5244 ix86_emit_save_regs ();
5246 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5248 /* When using red zone we may start register saving before allocating
5249 the stack frame saving one cycle of the prologue. */
5250 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5251 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5252 : stack_pointer_rtx
,
5253 -frame
.nregs
* UNITS_PER_WORD
);
5257 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5258 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5259 GEN_INT (-allocate
), -1);
5262 /* Only valid for Win32. */
5263 rtx eax
= gen_rtx_REG (SImode
, 0);
5264 bool eax_live
= ix86_eax_live_at_start_p ();
5267 gcc_assert (!TARGET_64BIT
);
5271 emit_insn (gen_push (eax
));
5275 emit_move_insn (eax
, GEN_INT (allocate
));
5277 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5278 RTX_FRAME_RELATED_P (insn
) = 1;
5279 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5280 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5281 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5282 t
, REG_NOTES (insn
));
5286 if (frame_pointer_needed
)
5287 t
= plus_constant (hard_frame_pointer_rtx
,
5290 - frame
.nregs
* UNITS_PER_WORD
);
5292 t
= plus_constant (stack_pointer_rtx
, allocate
);
5293 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5297 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5299 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5300 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5302 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5303 -frame
.nregs
* UNITS_PER_WORD
);
5306 pic_reg_used
= false;
5307 if (pic_offset_table_rtx
5308 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5309 || current_function_profile
))
5311 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5313 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5314 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5316 pic_reg_used
= true;
5322 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5324 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5326 /* Even with accurate pre-reload life analysis, we can wind up
5327 deleting all references to the pic register after reload.
5328 Consider if cross-jumping unifies two sides of a branch
5329 controlled by a comparison vs the only read from a global.
5330 In which case, allow the set_got to be deleted, though we're
5331 too late to do anything about the ebx save in the prologue. */
5332 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5335 /* Prevent function calls from be scheduled before the call to mcount.
5336 In the pic_reg_used case, make sure that the got load isn't deleted. */
5337 if (current_function_profile
)
5338 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5341 /* Emit code to restore saved registers using MOV insns. First register
5342 is restored from POINTER + OFFSET. */
5344 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5345 int maybe_eh_return
)
5348 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5350 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5351 if (ix86_save_reg (regno
, maybe_eh_return
))
5353 /* Ensure that adjust_address won't be forced to produce pointer
5354 out of range allowed by x86-64 instruction set. */
5355 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5359 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5360 emit_move_insn (r11
, GEN_INT (offset
));
5361 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5362 base_address
= gen_rtx_MEM (Pmode
, r11
);
5365 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5366 adjust_address (base_address
, Pmode
, offset
));
5367 offset
+= UNITS_PER_WORD
;
5371 /* Restore function stack, frame, and registers. */
5374 ix86_expand_epilogue (int style
)
5377 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5378 struct ix86_frame frame
;
5379 HOST_WIDE_INT offset
;
5381 ix86_compute_frame_layout (&frame
);
5383 /* Calculate start of saved registers relative to ebp. Special care
5384 must be taken for the normal return case of a function using
5385 eh_return: the eax and edx registers are marked as saved, but not
5386 restored along this path. */
5387 offset
= frame
.nregs
;
5388 if (current_function_calls_eh_return
&& style
!= 2)
5390 offset
*= -UNITS_PER_WORD
;
5392 /* If we're only restoring one register and sp is not valid then
5393 using a move instruction to restore the register since it's
5394 less work than reloading sp and popping the register.
5396 The default code result in stack adjustment using add/lea instruction,
5397 while this code results in LEAVE instruction (or discrete equivalent),
5398 so it is profitable in some other cases as well. Especially when there
5399 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5400 and there is exactly one register to pop. This heuristic may need some
5401 tuning in future. */
5402 if ((!sp_valid
&& frame
.nregs
<= 1)
5403 || (TARGET_EPILOGUE_USING_MOVE
5404 && cfun
->machine
->use_fast_prologue_epilogue
5405 && (frame
.nregs
> 1 || frame
.to_allocate
))
5406 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5407 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5408 && cfun
->machine
->use_fast_prologue_epilogue
5409 && frame
.nregs
== 1)
5410 || current_function_calls_eh_return
)
5412 /* Restore registers. We can use ebp or esp to address the memory
5413 locations. If both are available, default to ebp, since offsets
5414 are known to be small. Only exception is esp pointing directly to the
5415 end of block of saved registers, where we may simplify addressing
5418 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5419 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5420 frame
.to_allocate
, style
== 2);
5422 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5423 offset
, style
== 2);
5425 /* eh_return epilogues need %ecx added to the stack pointer. */
5428 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5430 if (frame_pointer_needed
)
5432 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5433 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5434 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5436 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5437 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5439 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5444 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5445 tmp
= plus_constant (tmp
, (frame
.to_allocate
5446 + frame
.nregs
* UNITS_PER_WORD
));
5447 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5450 else if (!frame_pointer_needed
)
5451 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5452 GEN_INT (frame
.to_allocate
5453 + frame
.nregs
* UNITS_PER_WORD
),
5455 /* If not an i386, mov & pop is faster than "leave". */
5456 else if (TARGET_USE_LEAVE
|| optimize_size
5457 || !cfun
->machine
->use_fast_prologue_epilogue
)
5458 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5461 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5462 hard_frame_pointer_rtx
,
5465 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5467 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5472 /* First step is to deallocate the stack frame so that we can
5473 pop the registers. */
5476 gcc_assert (frame_pointer_needed
);
5477 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5478 hard_frame_pointer_rtx
,
5479 GEN_INT (offset
), style
);
5481 else if (frame
.to_allocate
)
5482 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5483 GEN_INT (frame
.to_allocate
), style
);
5485 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5486 if (ix86_save_reg (regno
, false))
5489 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5491 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5493 if (frame_pointer_needed
)
5495 /* Leave results in shorter dependency chains on CPUs that are
5496 able to grok it fast. */
5497 if (TARGET_USE_LEAVE
)
5498 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5499 else if (TARGET_64BIT
)
5500 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5502 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5506 if (cfun
->machine
->force_align_arg_pointer
)
5508 emit_insn (gen_addsi3 (stack_pointer_rtx
,
5509 cfun
->machine
->force_align_arg_pointer
,
5513 /* Sibcall epilogues don't want a return instruction. */
5517 if (current_function_pops_args
&& current_function_args_size
)
5519 rtx popc
= GEN_INT (current_function_pops_args
);
5521 /* i386 can only pop 64K bytes. If asked to pop more, pop
5522 return address, do explicit add, and jump indirectly to the
5525 if (current_function_pops_args
>= 65536)
5527 rtx ecx
= gen_rtx_REG (SImode
, 2);
5529 /* There is no "pascal" calling convention in 64bit ABI. */
5530 gcc_assert (!TARGET_64BIT
);
5532 emit_insn (gen_popsi1 (ecx
));
5533 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5534 emit_jump_insn (gen_return_indirect_internal (ecx
));
5537 emit_jump_insn (gen_return_pop_internal (popc
));
5540 emit_jump_insn (gen_return_internal ());
5543 /* Reset from the function's potential modifications. */
5546 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5547 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5549 if (pic_offset_table_rtx
)
5550 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5553 /* Extract the parts of an RTL expression that is a valid memory address
5554 for an instruction. Return 0 if the structure of the address is
5555 grossly off. Return -1 if the address contains ASHIFT, so it is not
5556 strictly valid, but still used for computing length of lea instruction. */
5559 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5561 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
5562 rtx base_reg
, index_reg
;
5563 HOST_WIDE_INT scale
= 1;
5564 rtx scale_rtx
= NULL_RTX
;
5566 enum ix86_address_seg seg
= SEG_DEFAULT
;
5568 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
5570 else if (GET_CODE (addr
) == PLUS
)
5580 addends
[n
++] = XEXP (op
, 1);
5583 while (GET_CODE (op
) == PLUS
);
5588 for (i
= n
; i
>= 0; --i
)
5591 switch (GET_CODE (op
))
5596 index
= XEXP (op
, 0);
5597 scale_rtx
= XEXP (op
, 1);
5601 if (XINT (op
, 1) == UNSPEC_TP
5602 && TARGET_TLS_DIRECT_SEG_REFS
5603 && seg
== SEG_DEFAULT
)
5604 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
5633 else if (GET_CODE (addr
) == MULT
)
5635 index
= XEXP (addr
, 0); /* index*scale */
5636 scale_rtx
= XEXP (addr
, 1);
5638 else if (GET_CODE (addr
) == ASHIFT
)
5642 /* We're called for lea too, which implements ashift on occasion. */
5643 index
= XEXP (addr
, 0);
5644 tmp
= XEXP (addr
, 1);
5645 if (GET_CODE (tmp
) != CONST_INT
)
5647 scale
= INTVAL (tmp
);
5648 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5654 disp
= addr
; /* displacement */
5656 /* Extract the integral value of scale. */
5659 if (GET_CODE (scale_rtx
) != CONST_INT
)
5661 scale
= INTVAL (scale_rtx
);
5664 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
5665 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
5667 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5668 if (base_reg
&& index_reg
&& scale
== 1
5669 && (index_reg
== arg_pointer_rtx
5670 || index_reg
== frame_pointer_rtx
5671 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
5674 tmp
= base
, base
= index
, index
= tmp
;
5675 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
5678 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5679 if ((base_reg
== hard_frame_pointer_rtx
5680 || base_reg
== frame_pointer_rtx
5681 || base_reg
== arg_pointer_rtx
) && !disp
)
5684 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5685 Avoid this by transforming to [%esi+0]. */
5686 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
5687 && base_reg
&& !index_reg
&& !disp
5689 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
5692 /* Special case: encode reg+reg instead of reg*2. */
5693 if (!base
&& index
&& scale
&& scale
== 2)
5694 base
= index
, base_reg
= index_reg
, scale
= 1;
5696 /* Special case: scaling cannot be encoded without base or displacement. */
5697 if (!base
&& !disp
&& index
&& scale
!= 1)
5709 /* Return cost of the memory address x.
5710 For i386, it is better to use a complex address than let gcc copy
5711 the address into a reg and make a new pseudo. But not if the address
5712 requires to two regs - that would mean more pseudos with longer
5715 ix86_address_cost (rtx x
)
5717 struct ix86_address parts
;
5719 int ok
= ix86_decompose_address (x
, &parts
);
5723 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
5724 parts
.base
= SUBREG_REG (parts
.base
);
5725 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
5726 parts
.index
= SUBREG_REG (parts
.index
);
5728 /* More complex memory references are better. */
5729 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5731 if (parts
.seg
!= SEG_DEFAULT
)
5734 /* Attempt to minimize number of registers in the address. */
5736 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5738 && (!REG_P (parts
.index
)
5739 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5743 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5745 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5746 && parts
.base
!= parts
.index
)
5749 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5750 since it's predecode logic can't detect the length of instructions
5751 and it degenerates to vector decoded. Increase cost of such
5752 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5753 to split such addresses or even refuse such addresses at all.
5755 Following addressing modes are affected:
5760 The first and last case may be avoidable by explicitly coding the zero in
5761 memory address, but I don't have AMD-K6 machine handy to check this
5765 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5766 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5767 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5773 /* If X is a machine specific address (i.e. a symbol or label being
5774 referenced as a displacement from the GOT implemented using an
5775 UNSPEC), then return the base term. Otherwise return X. */
5778 ix86_find_base_term (rtx x
)
5784 if (GET_CODE (x
) != CONST
)
5787 if (GET_CODE (term
) == PLUS
5788 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
5789 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
5790 term
= XEXP (term
, 0);
5791 if (GET_CODE (term
) != UNSPEC
5792 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
5795 term
= XVECEXP (term
, 0, 0);
5797 if (GET_CODE (term
) != SYMBOL_REF
5798 && GET_CODE (term
) != LABEL_REF
)
5804 term
= ix86_delegitimize_address (x
);
5806 if (GET_CODE (term
) != SYMBOL_REF
5807 && GET_CODE (term
) != LABEL_REF
)
5813 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5814 this is used for to form addresses to local data when -fPIC is in
5818 darwin_local_data_pic (rtx disp
)
5820 if (GET_CODE (disp
) == MINUS
)
5822 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5823 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5824 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5826 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5827 if (! strcmp (sym_name
, "<pic base>"))
5835 /* Determine if a given RTX is a valid constant. We already know this
5836 satisfies CONSTANT_P. */
5839 legitimate_constant_p (rtx x
)
5841 switch (GET_CODE (x
))
5846 if (GET_CODE (x
) == PLUS
)
5848 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
5853 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
5856 /* Only some unspecs are valid as "constants". */
5857 if (GET_CODE (x
) == UNSPEC
)
5858 switch (XINT (x
, 1))
5861 return TARGET_64BIT
;
5864 x
= XVECEXP (x
, 0, 0);
5865 return (GET_CODE (x
) == SYMBOL_REF
5866 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
5868 x
= XVECEXP (x
, 0, 0);
5869 return (GET_CODE (x
) == SYMBOL_REF
5870 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
5875 /* We must have drilled down to a symbol. */
5876 if (GET_CODE (x
) == LABEL_REF
)
5878 if (GET_CODE (x
) != SYMBOL_REF
)
5883 /* TLS symbols are never valid. */
5884 if (SYMBOL_REF_TLS_MODEL (x
))
5892 /* Otherwise we handle everything else in the move patterns. */
5896 /* Determine if it's legal to put X into the constant pool. This
5897 is not possible for the address of thread-local symbols, which
5898 is checked above. */
5901 ix86_cannot_force_const_mem (rtx x
)
5903 return !legitimate_constant_p (x
);
5906 /* Determine if a given RTX is a valid constant address. */
5909 constant_address_p (rtx x
)
5911 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
5914 /* Nonzero if the constant value X is a legitimate general operand
5915 when generating PIC code. It is given that flag_pic is on and
5916 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5919 legitimate_pic_operand_p (rtx x
)
5923 switch (GET_CODE (x
))
5926 inner
= XEXP (x
, 0);
5927 if (GET_CODE (inner
) == PLUS
5928 && GET_CODE (XEXP (inner
, 1)) == CONST_INT
)
5929 inner
= XEXP (inner
, 0);
5931 /* Only some unspecs are valid as "constants". */
5932 if (GET_CODE (inner
) == UNSPEC
)
5933 switch (XINT (inner
, 1))
5936 return TARGET_64BIT
;
5938 x
= XVECEXP (inner
, 0, 0);
5939 return (GET_CODE (x
) == SYMBOL_REF
5940 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
5948 return legitimate_pic_address_disp_p (x
);
5955 /* Determine if a given CONST RTX is a valid memory displacement
5959 legitimate_pic_address_disp_p (rtx disp
)
5963 /* In 64bit mode we can allow direct addresses of symbols and labels
5964 when they are not dynamic symbols. */
5967 rtx op0
= disp
, op1
;
5969 switch (GET_CODE (disp
))
5975 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
5977 op0
= XEXP (XEXP (disp
, 0), 0);
5978 op1
= XEXP (XEXP (disp
, 0), 1);
5979 if (GET_CODE (op1
) != CONST_INT
5980 || INTVAL (op1
) >= 16*1024*1024
5981 || INTVAL (op1
) < -16*1024*1024)
5983 if (GET_CODE (op0
) == LABEL_REF
)
5985 if (GET_CODE (op0
) != SYMBOL_REF
)
5990 /* TLS references should always be enclosed in UNSPEC. */
5991 if (SYMBOL_REF_TLS_MODEL (op0
))
5993 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6001 if (GET_CODE (disp
) != CONST
)
6003 disp
= XEXP (disp
, 0);
6007 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6008 of GOT tables. We should not need these anyway. */
6009 if (GET_CODE (disp
) != UNSPEC
6010 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6011 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6014 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6015 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6021 if (GET_CODE (disp
) == PLUS
)
6023 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
6025 disp
= XEXP (disp
, 0);
6029 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6032 if (GET_CODE (disp
) != UNSPEC
)
6035 switch (XINT (disp
, 1))
6040 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6042 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6043 While ABI specify also 32bit relocation but we don't produce it in
6044 small PIC model at all. */
6045 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6046 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6048 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6050 case UNSPEC_GOTTPOFF
:
6051 case UNSPEC_GOTNTPOFF
:
6052 case UNSPEC_INDNTPOFF
:
6055 disp
= XVECEXP (disp
, 0, 0);
6056 return (GET_CODE (disp
) == SYMBOL_REF
6057 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6059 disp
= XVECEXP (disp
, 0, 0);
6060 return (GET_CODE (disp
) == SYMBOL_REF
6061 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6063 disp
= XVECEXP (disp
, 0, 0);
6064 return (GET_CODE (disp
) == SYMBOL_REF
6065 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6071 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6072 memory address for an instruction. The MODE argument is the machine mode
6073 for the MEM expression that wants to use this address.
6075 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6076 convert common non-canonical forms to canonical form so that they will
6080 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6082 struct ix86_address parts
;
6083 rtx base
, index
, disp
;
6084 HOST_WIDE_INT scale
;
6085 const char *reason
= NULL
;
6086 rtx reason_rtx
= NULL_RTX
;
6088 if (TARGET_DEBUG_ADDR
)
6091 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6092 GET_MODE_NAME (mode
), strict
);
6096 if (ix86_decompose_address (addr
, &parts
) <= 0)
6098 reason
= "decomposition failed";
6103 index
= parts
.index
;
6105 scale
= parts
.scale
;
6107 /* Validate base register.
6109 Don't allow SUBREG's that span more than a word here. It can lead to spill
6110 failures when the base is one word out of a two word structure, which is
6111 represented internally as a DImode int. */
6120 else if (GET_CODE (base
) == SUBREG
6121 && REG_P (SUBREG_REG (base
))
6122 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6124 reg
= SUBREG_REG (base
);
6127 reason
= "base is not a register";
6131 if (GET_MODE (base
) != Pmode
)
6133 reason
= "base is not in Pmode";
6137 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6138 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6140 reason
= "base is not valid";
6145 /* Validate index register.
6147 Don't allow SUBREG's that span more than a word here -- same as above. */
6156 else if (GET_CODE (index
) == SUBREG
6157 && REG_P (SUBREG_REG (index
))
6158 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6160 reg
= SUBREG_REG (index
);
6163 reason
= "index is not a register";
6167 if (GET_MODE (index
) != Pmode
)
6169 reason
= "index is not in Pmode";
6173 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6174 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6176 reason
= "index is not valid";
6181 /* Validate scale factor. */
6184 reason_rtx
= GEN_INT (scale
);
6187 reason
= "scale without index";
6191 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6193 reason
= "scale is not a valid multiplier";
6198 /* Validate displacement. */
6203 if (GET_CODE (disp
) == CONST
6204 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6205 switch (XINT (XEXP (disp
, 0), 1))
6207 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6208 used. While ABI specify also 32bit relocations, we don't produce
6209 them at all and use IP relative instead. */
6212 gcc_assert (flag_pic
);
6214 goto is_legitimate_pic
;
6215 reason
= "64bit address unspec";
6218 case UNSPEC_GOTPCREL
:
6219 gcc_assert (flag_pic
);
6220 goto is_legitimate_pic
;
6222 case UNSPEC_GOTTPOFF
:
6223 case UNSPEC_GOTNTPOFF
:
6224 case UNSPEC_INDNTPOFF
:
6230 reason
= "invalid address unspec";
6234 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
6236 && !machopic_operand_p (disp
)
6241 if (TARGET_64BIT
&& (index
|| base
))
6243 /* foo@dtpoff(%rX) is ok. */
6244 if (GET_CODE (disp
) != CONST
6245 || GET_CODE (XEXP (disp
, 0)) != PLUS
6246 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6247 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
6248 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6249 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6251 reason
= "non-constant pic memory reference";
6255 else if (! legitimate_pic_address_disp_p (disp
))
6257 reason
= "displacement is an invalid pic construct";
6261 /* This code used to verify that a symbolic pic displacement
6262 includes the pic_offset_table_rtx register.
6264 While this is good idea, unfortunately these constructs may
6265 be created by "adds using lea" optimization for incorrect
6274 This code is nonsensical, but results in addressing
6275 GOT table with pic_offset_table_rtx base. We can't
6276 just refuse it easily, since it gets matched by
6277 "addsi3" pattern, that later gets split to lea in the
6278 case output register differs from input. While this
6279 can be handled by separate addsi pattern for this case
6280 that never results in lea, this seems to be easier and
6281 correct fix for crash to disable this test. */
6283 else if (GET_CODE (disp
) != LABEL_REF
6284 && GET_CODE (disp
) != CONST_INT
6285 && (GET_CODE (disp
) != CONST
6286 || !legitimate_constant_p (disp
))
6287 && (GET_CODE (disp
) != SYMBOL_REF
6288 || !legitimate_constant_p (disp
)))
6290 reason
= "displacement is not constant";
6293 else if (TARGET_64BIT
6294 && !x86_64_immediate_operand (disp
, VOIDmode
))
6296 reason
= "displacement is out of range";
6301 /* Everything looks valid. */
6302 if (TARGET_DEBUG_ADDR
)
6303 fprintf (stderr
, "Success.\n");
6307 if (TARGET_DEBUG_ADDR
)
6309 fprintf (stderr
, "Error: %s\n", reason
);
6310 debug_rtx (reason_rtx
);
6315 /* Return a unique alias set for the GOT. */
6317 static HOST_WIDE_INT
6318 ix86_GOT_alias_set (void)
6320 static HOST_WIDE_INT set
= -1;
6322 set
= new_alias_set ();
6326 /* Return a legitimate reference for ORIG (an address) using the
6327 register REG. If REG is 0, a new pseudo is generated.
6329 There are two types of references that must be handled:
6331 1. Global data references must load the address from the GOT, via
6332 the PIC reg. An insn is emitted to do this load, and the reg is
6335 2. Static data references, constant pool addresses, and code labels
6336 compute the address as an offset from the GOT, whose base is in
6337 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6338 differentiate them from global data objects. The returned
6339 address is the PIC reg + an unspec constant.
6341 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6342 reg also appears in the address. */
6345 legitimize_pic_address (rtx orig
, rtx reg
)
6353 reg
= gen_reg_rtx (Pmode
);
6354 /* Use the generic Mach-O PIC machinery. */
6355 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6358 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6360 else if (TARGET_64BIT
6361 && ix86_cmodel
!= CM_SMALL_PIC
6362 && local_symbolic_operand (addr
, Pmode
))
6365 /* This symbol may be referenced via a displacement from the PIC
6366 base address (@GOTOFF). */
6368 if (reload_in_progress
)
6369 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6370 if (GET_CODE (addr
) == CONST
)
6371 addr
= XEXP (addr
, 0);
6372 if (GET_CODE (addr
) == PLUS
)
6374 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6375 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6378 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6379 new = gen_rtx_CONST (Pmode
, new);
6381 tmpreg
= gen_reg_rtx (Pmode
);
6384 emit_move_insn (tmpreg
, new);
6388 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
6389 tmpreg
, 1, OPTAB_DIRECT
);
6392 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
6394 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6396 /* This symbol may be referenced via a displacement from the PIC
6397 base address (@GOTOFF). */
6399 if (reload_in_progress
)
6400 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6401 if (GET_CODE (addr
) == CONST
)
6402 addr
= XEXP (addr
, 0);
6403 if (GET_CODE (addr
) == PLUS
)
6405 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6406 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6409 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6410 new = gen_rtx_CONST (Pmode
, new);
6411 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6415 emit_move_insn (reg
, new);
6419 else if (GET_CODE (addr
) == SYMBOL_REF
)
6423 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6424 new = gen_rtx_CONST (Pmode
, new);
6425 new = gen_const_mem (Pmode
, new);
6426 set_mem_alias_set (new, ix86_GOT_alias_set ());
6429 reg
= gen_reg_rtx (Pmode
);
6430 /* Use directly gen_movsi, otherwise the address is loaded
6431 into register for CSE. We don't want to CSE this addresses,
6432 instead we CSE addresses from the GOT table, so skip this. */
6433 emit_insn (gen_movsi (reg
, new));
6438 /* This symbol must be referenced via a load from the
6439 Global Offset Table (@GOT). */
6441 if (reload_in_progress
)
6442 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6443 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6444 new = gen_rtx_CONST (Pmode
, new);
6445 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6446 new = gen_const_mem (Pmode
, new);
6447 set_mem_alias_set (new, ix86_GOT_alias_set ());
6450 reg
= gen_reg_rtx (Pmode
);
6451 emit_move_insn (reg
, new);
6457 if (GET_CODE (addr
) == CONST_INT
6458 && !x86_64_immediate_operand (addr
, VOIDmode
))
6462 emit_move_insn (reg
, addr
);
6466 new = force_reg (Pmode
, addr
);
6468 else if (GET_CODE (addr
) == CONST
)
6470 addr
= XEXP (addr
, 0);
6472 /* We must match stuff we generate before. Assume the only
6473 unspecs that can get here are ours. Not that we could do
6474 anything with them anyway.... */
6475 if (GET_CODE (addr
) == UNSPEC
6476 || (GET_CODE (addr
) == PLUS
6477 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6479 gcc_assert (GET_CODE (addr
) == PLUS
);
6481 if (GET_CODE (addr
) == PLUS
)
6483 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6485 /* Check first to see if this is a constant offset from a @GOTOFF
6486 symbol reference. */
6487 if (local_symbolic_operand (op0
, Pmode
)
6488 && GET_CODE (op1
) == CONST_INT
)
6492 if (reload_in_progress
)
6493 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6494 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6496 new = gen_rtx_PLUS (Pmode
, new, op1
);
6497 new = gen_rtx_CONST (Pmode
, new);
6498 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6502 emit_move_insn (reg
, new);
6508 if (INTVAL (op1
) < -16*1024*1024
6509 || INTVAL (op1
) >= 16*1024*1024)
6511 if (!x86_64_immediate_operand (op1
, Pmode
))
6512 op1
= force_reg (Pmode
, op1
);
6513 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
6519 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6520 new = legitimize_pic_address (XEXP (addr
, 1),
6521 base
== reg
? NULL_RTX
: reg
);
6523 if (GET_CODE (new) == CONST_INT
)
6524 new = plus_constant (base
, INTVAL (new));
6527 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6529 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6530 new = XEXP (new, 1);
6532 new = gen_rtx_PLUS (Pmode
, base
, new);
6540 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6543 get_thread_pointer (int to_reg
)
6547 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6551 reg
= gen_reg_rtx (Pmode
);
6552 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6553 insn
= emit_insn (insn
);
6558 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6559 false if we expect this to be used for a memory address and true if
6560 we expect to load the address into a register. */
6563 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6565 rtx dest
, base
, off
, pic
, tp
;
6570 case TLS_MODEL_GLOBAL_DYNAMIC
:
6571 dest
= gen_reg_rtx (Pmode
);
6572 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
6574 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
6576 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6579 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6580 insns
= get_insns ();
6583 emit_libcall_block (insns
, dest
, rax
, x
);
6585 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
6586 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
6588 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6590 if (TARGET_GNU2_TLS
)
6592 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
6594 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
6598 case TLS_MODEL_LOCAL_DYNAMIC
:
6599 base
= gen_reg_rtx (Pmode
);
6600 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
6602 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
6604 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6607 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6608 insns
= get_insns ();
6611 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6612 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6613 emit_libcall_block (insns
, base
, rax
, note
);
6615 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
6616 emit_insn (gen_tls_local_dynamic_base_64 (base
));
6618 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6620 if (TARGET_GNU2_TLS
)
6622 rtx x
= ix86_tls_module_base ();
6624 base
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, base
));
6626 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
6629 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6630 off
= gen_rtx_CONST (Pmode
, off
);
6632 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
6635 case TLS_MODEL_INITIAL_EXEC
:
6639 type
= UNSPEC_GOTNTPOFF
;
6643 if (reload_in_progress
)
6644 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6645 pic
= pic_offset_table_rtx
;
6646 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6648 else if (!TARGET_ANY_GNU_TLS
)
6650 pic
= gen_reg_rtx (Pmode
);
6651 emit_insn (gen_set_got (pic
));
6652 type
= UNSPEC_GOTTPOFF
;
6657 type
= UNSPEC_INDNTPOFF
;
6660 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6661 off
= gen_rtx_CONST (Pmode
, off
);
6663 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6664 off
= gen_const_mem (Pmode
, off
);
6665 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6667 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6669 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6670 off
= force_reg (Pmode
, off
);
6671 return gen_rtx_PLUS (Pmode
, base
, off
);
6675 base
= get_thread_pointer (true);
6676 dest
= gen_reg_rtx (Pmode
);
6677 emit_insn (gen_subsi3 (dest
, base
, off
));
6681 case TLS_MODEL_LOCAL_EXEC
:
6682 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6683 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6684 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6685 off
= gen_rtx_CONST (Pmode
, off
);
6687 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6689 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6690 return gen_rtx_PLUS (Pmode
, base
, off
);
6694 base
= get_thread_pointer (true);
6695 dest
= gen_reg_rtx (Pmode
);
6696 emit_insn (gen_subsi3 (dest
, base
, off
));
6707 /* Try machine-dependent ways of modifying an illegitimate address
6708 to be legitimate. If we find one, return the new, valid address.
6709 This macro is used in only one place: `memory_address' in explow.c.
6711 OLDX is the address as it was before break_out_memory_refs was called.
6712 In some cases it is useful to look at this to decide what needs to be done.
6714 MODE and WIN are passed so that this macro can use
6715 GO_IF_LEGITIMATE_ADDRESS.
6717 It is always safe for this macro to do nothing. It exists to recognize
6718 opportunities to optimize the output.
6720 For the 80386, we handle X+REG by loading X into a register R and
6721 using R+REG. R will go in a general reg and indexing will be used.
6722 However, if REG is a broken-out memory address or multiplication,
6723 nothing needs to be done because REG can certainly go in a general reg.
6725 When -fpic is used, special handling is needed for symbolic references.
6726 See comments by legitimize_pic_address in i386.c for details. */
6729 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
6734 if (TARGET_DEBUG_ADDR
)
6736 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6737 GET_MODE_NAME (mode
));
6741 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
6743 return legitimize_tls_address (x
, log
, false);
6744 if (GET_CODE (x
) == CONST
6745 && GET_CODE (XEXP (x
, 0)) == PLUS
6746 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
6747 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
6749 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
6750 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
6753 if (flag_pic
&& SYMBOLIC_CONST (x
))
6754 return legitimize_pic_address (x
, 0);
6756 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6757 if (GET_CODE (x
) == ASHIFT
6758 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6759 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
6762 log
= INTVAL (XEXP (x
, 1));
6763 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
6764 GEN_INT (1 << log
));
6767 if (GET_CODE (x
) == PLUS
)
6769 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6771 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
6772 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
6773 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
6776 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
6777 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
6778 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
6779 GEN_INT (1 << log
));
6782 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
6783 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
6784 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
6787 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
6788 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
6789 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
6790 GEN_INT (1 << log
));
6793 /* Put multiply first if it isn't already. */
6794 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6796 rtx tmp
= XEXP (x
, 0);
6797 XEXP (x
, 0) = XEXP (x
, 1);
6802 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6803 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6804 created by virtual register instantiation, register elimination, and
6805 similar optimizations. */
6806 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
6809 x
= gen_rtx_PLUS (Pmode
,
6810 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
6811 XEXP (XEXP (x
, 1), 0)),
6812 XEXP (XEXP (x
, 1), 1));
6816 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6817 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6818 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
6819 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6820 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
6821 && CONSTANT_P (XEXP (x
, 1)))
6824 rtx other
= NULL_RTX
;
6826 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6828 constant
= XEXP (x
, 1);
6829 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6831 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
6833 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6834 other
= XEXP (x
, 1);
6842 x
= gen_rtx_PLUS (Pmode
,
6843 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
6844 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
6845 plus_constant (other
, INTVAL (constant
)));
6849 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6852 if (GET_CODE (XEXP (x
, 0)) == MULT
)
6855 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
6858 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6861 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
6865 && GET_CODE (XEXP (x
, 1)) == REG
6866 && GET_CODE (XEXP (x
, 0)) == REG
)
6869 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
6872 x
= legitimize_pic_address (x
, 0);
6875 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6878 if (GET_CODE (XEXP (x
, 0)) == REG
)
6880 rtx temp
= gen_reg_rtx (Pmode
);
6881 rtx val
= force_operand (XEXP (x
, 1), temp
);
6883 emit_move_insn (temp
, val
);
6889 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6891 rtx temp
= gen_reg_rtx (Pmode
);
6892 rtx val
= force_operand (XEXP (x
, 0), temp
);
6894 emit_move_insn (temp
, val
);
6904 /* Print an integer constant expression in assembler syntax. Addition
6905 and subtraction are the only arithmetic that may appear in these
6906 expressions. FILE is the stdio stream to write to, X is the rtx, and
6907 CODE is the operand print code from the output string. */
6910 output_pic_addr_const (FILE *file
, rtx x
, int code
)
6914 switch (GET_CODE (x
))
6917 gcc_assert (flag_pic
);
6922 output_addr_const (file
, x
);
6923 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
6924 fputs ("@PLT", file
);
6931 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6932 assemble_name (asm_out_file
, buf
);
6936 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6940 /* This used to output parentheses around the expression,
6941 but that does not work on the 386 (either ATT or BSD assembler). */
6942 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6946 if (GET_MODE (x
) == VOIDmode
)
6948 /* We can use %d if the number is <32 bits and positive. */
6949 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6950 fprintf (file
, "0x%lx%08lx",
6951 (unsigned long) CONST_DOUBLE_HIGH (x
),
6952 (unsigned long) CONST_DOUBLE_LOW (x
));
6954 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6957 /* We can't handle floating point constants;
6958 PRINT_OPERAND must handle them. */
6959 output_operand_lossage ("floating constant misused");
6963 /* Some assemblers need integer constants to appear first. */
6964 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6966 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6968 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6972 gcc_assert (GET_CODE (XEXP (x
, 1)) == CONST_INT
);
6973 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6975 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6981 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6982 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6984 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6986 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6990 gcc_assert (XVECLEN (x
, 0) == 1);
6991 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6992 switch (XINT (x
, 1))
6995 fputs ("@GOT", file
);
6998 fputs ("@GOTOFF", file
);
7000 case UNSPEC_GOTPCREL
:
7001 fputs ("@GOTPCREL(%rip)", file
);
7003 case UNSPEC_GOTTPOFF
:
7004 /* FIXME: This might be @TPOFF in Sun ld too. */
7005 fputs ("@GOTTPOFF", file
);
7008 fputs ("@TPOFF", file
);
7012 fputs ("@TPOFF", file
);
7014 fputs ("@NTPOFF", file
);
7017 fputs ("@DTPOFF", file
);
7019 case UNSPEC_GOTNTPOFF
:
7021 fputs ("@GOTTPOFF(%rip)", file
);
7023 fputs ("@GOTNTPOFF", file
);
7025 case UNSPEC_INDNTPOFF
:
7026 fputs ("@INDNTPOFF", file
);
7029 output_operand_lossage ("invalid UNSPEC as operand");
7035 output_operand_lossage ("invalid expression as operand");
7039 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7040 We need to emit DTP-relative relocations. */
7043 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7045 fputs (ASM_LONG
, file
);
7046 output_addr_const (file
, x
);
7047 fputs ("@DTPOFF", file
);
7053 fputs (", 0", file
);
7060 /* In the name of slightly smaller debug output, and to cater to
7061 general assembler lossage, recognize PIC+GOTOFF and turn it back
7062 into a direct symbol reference. */
7065 ix86_delegitimize_address (rtx orig_x
)
7069 if (GET_CODE (x
) == MEM
)
7074 if (GET_CODE (x
) != CONST
7075 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7076 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7077 || GET_CODE (orig_x
) != MEM
)
7079 return XVECEXP (XEXP (x
, 0), 0, 0);
7082 if (GET_CODE (x
) != PLUS
7083 || GET_CODE (XEXP (x
, 1)) != CONST
)
7086 if (GET_CODE (XEXP (x
, 0)) == REG
7087 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7088 /* %ebx + GOT/GOTOFF */
7090 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7092 /* %ebx + %reg * scale + GOT/GOTOFF */
7094 if (GET_CODE (XEXP (y
, 0)) == REG
7095 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7097 else if (GET_CODE (XEXP (y
, 1)) == REG
7098 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7102 if (GET_CODE (y
) != REG
7103 && GET_CODE (y
) != MULT
7104 && GET_CODE (y
) != ASHIFT
)
7110 x
= XEXP (XEXP (x
, 1), 0);
7111 if (GET_CODE (x
) == UNSPEC
7112 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7113 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
7116 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
7117 return XVECEXP (x
, 0, 0);
7120 if (GET_CODE (x
) == PLUS
7121 && GET_CODE (XEXP (x
, 0)) == UNSPEC
7122 && GET_CODE (XEXP (x
, 1)) == CONST_INT
7123 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7124 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
7125 && GET_CODE (orig_x
) != MEM
)))
7127 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
7129 return gen_rtx_PLUS (Pmode
, y
, x
);
7137 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7142 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7144 enum rtx_code second_code
, bypass_code
;
7145 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7146 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7147 code
= ix86_fp_compare_code_to_integer (code
);
7151 code
= reverse_condition (code
);
7162 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7166 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7167 Those same assemblers have the same but opposite lossage on cmov. */
7168 gcc_assert (mode
== CCmode
);
7169 suffix
= fp
? "nbe" : "a";
7189 gcc_assert (mode
== CCmode
);
7211 gcc_assert (mode
== CCmode
);
7212 suffix
= fp
? "nb" : "ae";
7215 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7219 gcc_assert (mode
== CCmode
);
7223 suffix
= fp
? "u" : "p";
7226 suffix
= fp
? "nu" : "np";
7231 fputs (suffix
, file
);
7234 /* Print the name of register X to FILE based on its machine mode and number.
7235 If CODE is 'w', pretend the mode is HImode.
7236 If CODE is 'b', pretend the mode is QImode.
7237 If CODE is 'k', pretend the mode is SImode.
7238 If CODE is 'q', pretend the mode is DImode.
7239 If CODE is 'h', pretend the reg is the 'high' byte register.
7240 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7243 print_reg (rtx x
, int code
, FILE *file
)
7245 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7246 && REGNO (x
) != FRAME_POINTER_REGNUM
7247 && REGNO (x
) != FLAGS_REG
7248 && REGNO (x
) != FPSR_REG
);
7250 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7253 if (code
== 'w' || MMX_REG_P (x
))
7255 else if (code
== 'b')
7257 else if (code
== 'k')
7259 else if (code
== 'q')
7261 else if (code
== 'y')
7263 else if (code
== 'h')
7266 code
= GET_MODE_SIZE (GET_MODE (x
));
7268 /* Irritatingly, AMD extended registers use different naming convention
7269 from the normal registers. */
7270 if (REX_INT_REG_P (x
))
7272 gcc_assert (TARGET_64BIT
);
7276 error ("extended registers have no high halves");
7279 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7282 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7285 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7288 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7291 error ("unsupported operand size for extended register");
7299 if (STACK_TOP_P (x
))
7301 fputs ("st(0)", file
);
7308 if (! ANY_FP_REG_P (x
))
7309 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7314 fputs (hi_reg_name
[REGNO (x
)], file
);
7317 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7319 fputs (qi_reg_name
[REGNO (x
)], file
);
7322 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7324 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7331 /* Locate some local-dynamic symbol still in use by this function
7332 so that we can print its name in some tls_local_dynamic_base
7336 get_some_local_dynamic_name (void)
7340 if (cfun
->machine
->some_ld_name
)
7341 return cfun
->machine
->some_ld_name
;
7343 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7345 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7346 return cfun
->machine
->some_ld_name
;
7352 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7356 if (GET_CODE (x
) == SYMBOL_REF
7357 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
7359 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7367 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7368 C -- print opcode suffix for set/cmov insn.
7369 c -- like C, but print reversed condition
7370 F,f -- likewise, but for floating-point.
7371 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7373 R -- print the prefix for register names.
7374 z -- print the opcode suffix for the size of the current operand.
7375 * -- print a star (in certain assembler syntax)
7376 A -- print an absolute memory reference.
7377 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7378 s -- print a shift double count, followed by the assemblers argument
7380 b -- print the QImode name of the register for the indicated operand.
7381 %b0 would print %al if operands[0] is reg 0.
7382 w -- likewise, print the HImode name of the register.
7383 k -- likewise, print the SImode name of the register.
7384 q -- likewise, print the DImode name of the register.
7385 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7386 y -- print "st(0)" instead of "st" as a register.
7387 D -- print condition for SSE cmp instruction.
7388 P -- if PIC, print an @PLT suffix.
7389 X -- don't print any sort of PIC '@' suffix for a symbol.
7390 & -- print some in-use local-dynamic symbol name.
7391 H -- print a memory address offset by 8; used for sse high-parts
7395 print_operand (FILE *file
, rtx x
, int code
)
7402 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7407 assemble_name (file
, get_some_local_dynamic_name ());
7411 switch (ASSEMBLER_DIALECT
)
7418 /* Intel syntax. For absolute addresses, registers should not
7419 be surrounded by braces. */
7420 if (GET_CODE (x
) != REG
)
7423 PRINT_OPERAND (file
, x
, 0);
7433 PRINT_OPERAND (file
, x
, 0);
7438 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7443 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7448 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7453 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7458 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7463 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7468 /* 387 opcodes don't get size suffixes if the operands are
7470 if (STACK_REG_P (x
))
7473 /* Likewise if using Intel opcodes. */
7474 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7477 /* This is the size of op from size of operand. */
7478 switch (GET_MODE_SIZE (GET_MODE (x
)))
7481 #ifdef HAVE_GAS_FILDS_FISTS
7487 if (GET_MODE (x
) == SFmode
)
7502 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7504 #ifdef GAS_MNEMONICS
7530 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7532 PRINT_OPERAND (file
, x
, 0);
7538 /* Little bit of braindamage here. The SSE compare instructions
7539 does use completely different names for the comparisons that the
7540 fp conditional moves. */
7541 switch (GET_CODE (x
))
7556 fputs ("unord", file
);
7560 fputs ("neq", file
);
7564 fputs ("nlt", file
);
7568 fputs ("nle", file
);
7571 fputs ("ord", file
);
7578 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7579 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7581 switch (GET_MODE (x
))
7583 case HImode
: putc ('w', file
); break;
7585 case SFmode
: putc ('l', file
); break;
7587 case DFmode
: putc ('q', file
); break;
7588 default: gcc_unreachable ();
7595 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7598 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7599 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7602 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7605 /* Like above, but reverse condition */
7607 /* Check to see if argument to %c is really a constant
7608 and not a condition code which needs to be reversed. */
7609 if (!COMPARISON_P (x
))
7611 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7614 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7617 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7618 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7621 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7625 /* It doesn't actually matter what mode we use here, as we're
7626 only going to use this for printing. */
7627 x
= adjust_address_nv (x
, DImode
, 8);
7634 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7637 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7640 int pred_val
= INTVAL (XEXP (x
, 0));
7642 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7643 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7645 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7646 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7648 /* Emit hints only in the case default branch prediction
7649 heuristics would fail. */
7650 if (taken
!= cputaken
)
7652 /* We use 3e (DS) prefix for taken branches and
7653 2e (CS) prefix for not taken branches. */
7655 fputs ("ds ; ", file
);
7657 fputs ("cs ; ", file
);
7664 output_operand_lossage ("invalid operand code '%c'", code
);
7668 if (GET_CODE (x
) == REG
)
7669 print_reg (x
, code
, file
);
7671 else if (GET_CODE (x
) == MEM
)
7673 /* No `byte ptr' prefix for call instructions. */
7674 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7677 switch (GET_MODE_SIZE (GET_MODE (x
)))
7679 case 1: size
= "BYTE"; break;
7680 case 2: size
= "WORD"; break;
7681 case 4: size
= "DWORD"; break;
7682 case 8: size
= "QWORD"; break;
7683 case 12: size
= "XWORD"; break;
7684 case 16: size
= "XMMWORD"; break;
7689 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7692 else if (code
== 'w')
7694 else if (code
== 'k')
7698 fputs (" PTR ", file
);
7702 /* Avoid (%rip) for call operands. */
7703 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7704 && GET_CODE (x
) != CONST_INT
)
7705 output_addr_const (file
, x
);
7706 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7707 output_operand_lossage ("invalid constraints for operand");
7712 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
7717 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7718 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
7720 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7722 fprintf (file
, "0x%08lx", l
);
7725 /* These float cases don't actually occur as immediate operands. */
7726 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
7730 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7731 fprintf (file
, "%s", dstr
);
7734 else if (GET_CODE (x
) == CONST_DOUBLE
7735 && GET_MODE (x
) == XFmode
)
7739 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7740 fprintf (file
, "%s", dstr
);
7745 /* We have patterns that allow zero sets of memory, for instance.
7746 In 64-bit mode, we should probably support all 8-byte vectors,
7747 since we can in fact encode that into an immediate. */
7748 if (GET_CODE (x
) == CONST_VECTOR
)
7750 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
7756 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
7758 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7761 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
7762 || GET_CODE (x
) == LABEL_REF
)
7764 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7767 fputs ("OFFSET FLAT:", file
);
7770 if (GET_CODE (x
) == CONST_INT
)
7771 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7773 output_pic_addr_const (file
, x
, code
);
7775 output_addr_const (file
, x
);
7779 /* Print a memory operand whose address is ADDR. */
7782 print_operand_address (FILE *file
, rtx addr
)
7784 struct ix86_address parts
;
7785 rtx base
, index
, disp
;
7787 int ok
= ix86_decompose_address (addr
, &parts
);
7792 index
= parts
.index
;
7794 scale
= parts
.scale
;
7802 if (USER_LABEL_PREFIX
[0] == 0)
7804 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
7810 if (!base
&& !index
)
7812 /* Displacement only requires special attention. */
7814 if (GET_CODE (disp
) == CONST_INT
)
7816 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
7818 if (USER_LABEL_PREFIX
[0] == 0)
7820 fputs ("ds:", file
);
7822 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
7825 output_pic_addr_const (file
, disp
, 0);
7827 output_addr_const (file
, disp
);
7829 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7832 if (GET_CODE (disp
) == CONST
7833 && GET_CODE (XEXP (disp
, 0)) == PLUS
7834 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7835 disp
= XEXP (XEXP (disp
, 0), 0);
7836 if (GET_CODE (disp
) == LABEL_REF
7837 || (GET_CODE (disp
) == SYMBOL_REF
7838 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
7839 fputs ("(%rip)", file
);
7844 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7849 output_pic_addr_const (file
, disp
, 0);
7850 else if (GET_CODE (disp
) == LABEL_REF
)
7851 output_asm_label (disp
);
7853 output_addr_const (file
, disp
);
7858 print_reg (base
, 0, file
);
7862 print_reg (index
, 0, file
);
7864 fprintf (file
, ",%d", scale
);
7870 rtx offset
= NULL_RTX
;
7874 /* Pull out the offset of a symbol; print any symbol itself. */
7875 if (GET_CODE (disp
) == CONST
7876 && GET_CODE (XEXP (disp
, 0)) == PLUS
7877 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7879 offset
= XEXP (XEXP (disp
, 0), 1);
7880 disp
= gen_rtx_CONST (VOIDmode
,
7881 XEXP (XEXP (disp
, 0), 0));
7885 output_pic_addr_const (file
, disp
, 0);
7886 else if (GET_CODE (disp
) == LABEL_REF
)
7887 output_asm_label (disp
);
7888 else if (GET_CODE (disp
) == CONST_INT
)
7891 output_addr_const (file
, disp
);
7897 print_reg (base
, 0, file
);
7900 if (INTVAL (offset
) >= 0)
7902 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7906 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7913 print_reg (index
, 0, file
);
7915 fprintf (file
, "*%d", scale
);
7923 output_addr_const_extra (FILE *file
, rtx x
)
7927 if (GET_CODE (x
) != UNSPEC
)
7930 op
= XVECEXP (x
, 0, 0);
7931 switch (XINT (x
, 1))
7933 case UNSPEC_GOTTPOFF
:
7934 output_addr_const (file
, op
);
7935 /* FIXME: This might be @TPOFF in Sun ld. */
7936 fputs ("@GOTTPOFF", file
);
7939 output_addr_const (file
, op
);
7940 fputs ("@TPOFF", file
);
7943 output_addr_const (file
, op
);
7945 fputs ("@TPOFF", file
);
7947 fputs ("@NTPOFF", file
);
7950 output_addr_const (file
, op
);
7951 fputs ("@DTPOFF", file
);
7953 case UNSPEC_GOTNTPOFF
:
7954 output_addr_const (file
, op
);
7956 fputs ("@GOTTPOFF(%rip)", file
);
7958 fputs ("@GOTNTPOFF", file
);
7960 case UNSPEC_INDNTPOFF
:
7961 output_addr_const (file
, op
);
7962 fputs ("@INDNTPOFF", file
);
7972 /* Split one or more DImode RTL references into pairs of SImode
7973 references. The RTL can be REG, offsettable MEM, integer constant, or
7974 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7975 split and "num" is its length. lo_half and hi_half are output arrays
7976 that parallel "operands". */
7979 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7983 rtx op
= operands
[num
];
7985 /* simplify_subreg refuse to split volatile memory addresses,
7986 but we still have to handle it. */
7987 if (GET_CODE (op
) == MEM
)
7989 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7990 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7994 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7995 GET_MODE (op
) == VOIDmode
7996 ? DImode
: GET_MODE (op
), 0);
7997 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7998 GET_MODE (op
) == VOIDmode
7999 ? DImode
: GET_MODE (op
), 4);
8003 /* Split one or more TImode RTL references into pairs of DImode
8004 references. The RTL can be REG, offsettable MEM, integer constant, or
8005 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8006 split and "num" is its length. lo_half and hi_half are output arrays
8007 that parallel "operands". */
8010 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8014 rtx op
= operands
[num
];
8016 /* simplify_subreg refuse to split volatile memory addresses, but we
8017 still have to handle it. */
8018 if (GET_CODE (op
) == MEM
)
8020 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8021 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8025 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8026 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8031 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8032 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8033 is the expression of the binary operation. The output may either be
8034 emitted here, or returned to the caller, like all output_* functions.
8036 There is no guarantee that the operands are the same mode, as they
8037 might be within FLOAT or FLOAT_EXTEND expressions. */
8039 #ifndef SYSV386_COMPAT
8040 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8041 wants to fix the assemblers because that causes incompatibility
8042 with gcc. No-one wants to fix gcc because that causes
8043 incompatibility with assemblers... You can use the option of
8044 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8045 #define SYSV386_COMPAT 1
8049 output_387_binary_op (rtx insn
, rtx
*operands
)
8051 static char buf
[30];
8054 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8056 #ifdef ENABLE_CHECKING
8057 /* Even if we do not want to check the inputs, this documents input
8058 constraints. Which helps in understanding the following code. */
8059 if (STACK_REG_P (operands
[0])
8060 && ((REG_P (operands
[1])
8061 && REGNO (operands
[0]) == REGNO (operands
[1])
8062 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
8063 || (REG_P (operands
[2])
8064 && REGNO (operands
[0]) == REGNO (operands
[2])
8065 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
8066 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8069 gcc_assert (is_sse
);
8072 switch (GET_CODE (operands
[3]))
8075 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8076 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8084 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8085 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8093 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8094 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8102 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8103 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8117 if (GET_MODE (operands
[0]) == SFmode
)
8118 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8120 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8125 switch (GET_CODE (operands
[3]))
8129 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8131 rtx temp
= operands
[2];
8132 operands
[2] = operands
[1];
8136 /* know operands[0] == operands[1]. */
8138 if (GET_CODE (operands
[2]) == MEM
)
8144 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8146 if (STACK_TOP_P (operands
[0]))
8147 /* How is it that we are storing to a dead operand[2]?
8148 Well, presumably operands[1] is dead too. We can't
8149 store the result to st(0) as st(0) gets popped on this
8150 instruction. Instead store to operands[2] (which I
8151 think has to be st(1)). st(1) will be popped later.
8152 gcc <= 2.8.1 didn't have this check and generated
8153 assembly code that the Unixware assembler rejected. */
8154 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8156 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8160 if (STACK_TOP_P (operands
[0]))
8161 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8163 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8168 if (GET_CODE (operands
[1]) == MEM
)
8174 if (GET_CODE (operands
[2]) == MEM
)
8180 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8183 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8184 derived assemblers, confusingly reverse the direction of
8185 the operation for fsub{r} and fdiv{r} when the
8186 destination register is not st(0). The Intel assembler
8187 doesn't have this brain damage. Read !SYSV386_COMPAT to
8188 figure out what the hardware really does. */
8189 if (STACK_TOP_P (operands
[0]))
8190 p
= "{p\t%0, %2|rp\t%2, %0}";
8192 p
= "{rp\t%2, %0|p\t%0, %2}";
8194 if (STACK_TOP_P (operands
[0]))
8195 /* As above for fmul/fadd, we can't store to st(0). */
8196 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8198 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8203 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8206 if (STACK_TOP_P (operands
[0]))
8207 p
= "{rp\t%0, %1|p\t%1, %0}";
8209 p
= "{p\t%1, %0|rp\t%0, %1}";
8211 if (STACK_TOP_P (operands
[0]))
8212 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8214 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8219 if (STACK_TOP_P (operands
[0]))
8221 if (STACK_TOP_P (operands
[1]))
8222 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8224 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8227 else if (STACK_TOP_P (operands
[1]))
8230 p
= "{\t%1, %0|r\t%0, %1}";
8232 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8238 p
= "{r\t%2, %0|\t%0, %2}";
8240 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8253 /* Return needed mode for entity in optimize_mode_switching pass. */
8256 ix86_mode_needed (int entity
, rtx insn
)
8258 enum attr_i387_cw mode
;
8260 /* The mode UNINITIALIZED is used to store control word after a
8261 function call or ASM pattern. The mode ANY specify that function
8262 has no requirements on the control word and make no changes in the
8263 bits we are interested in. */
8266 || (NONJUMP_INSN_P (insn
)
8267 && (asm_noperands (PATTERN (insn
)) >= 0
8268 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8269 return I387_CW_UNINITIALIZED
;
8271 if (recog_memoized (insn
) < 0)
8274 mode
= get_attr_i387_cw (insn
);
8279 if (mode
== I387_CW_TRUNC
)
8284 if (mode
== I387_CW_FLOOR
)
8289 if (mode
== I387_CW_CEIL
)
8294 if (mode
== I387_CW_MASK_PM
)
8305 /* Output code to initialize control word copies used by trunc?f?i and
8306 rounding patterns. CURRENT_MODE is set to current control word,
8307 while NEW_MODE is set to new control word. */
8310 emit_i387_cw_initialization (int mode
)
8312 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
8317 rtx reg
= gen_reg_rtx (HImode
);
8319 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
8320 emit_move_insn (reg
, stored_mode
);
8322 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
8327 /* round toward zero (truncate) */
8328 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
8329 slot
= SLOT_CW_TRUNC
;
8333 /* round down toward -oo */
8334 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8335 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
8336 slot
= SLOT_CW_FLOOR
;
8340 /* round up toward +oo */
8341 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8342 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
8343 slot
= SLOT_CW_CEIL
;
8346 case I387_CW_MASK_PM
:
8347 /* mask precision exception for nearbyint() */
8348 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8349 slot
= SLOT_CW_MASK_PM
;
8361 /* round toward zero (truncate) */
8362 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8363 slot
= SLOT_CW_TRUNC
;
8367 /* round down toward -oo */
8368 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
8369 slot
= SLOT_CW_FLOOR
;
8373 /* round up toward +oo */
8374 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
8375 slot
= SLOT_CW_CEIL
;
8378 case I387_CW_MASK_PM
:
8379 /* mask precision exception for nearbyint() */
8380 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8381 slot
= SLOT_CW_MASK_PM
;
8389 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
8391 new_mode
= assign_386_stack_local (HImode
, slot
);
8392 emit_move_insn (new_mode
, reg
);
8395 /* Output code for INSN to convert a float to a signed int. OPERANDS
8396 are the insn operands. The output may be [HSD]Imode and the input
8397 operand may be [SDX]Fmode. */
8400 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
8402 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8403 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8404 int round_mode
= get_attr_i387_cw (insn
);
8406 /* Jump through a hoop or two for DImode, since the hardware has no
8407 non-popping instruction. We used to do this a different way, but
8408 that was somewhat fragile and broke with post-reload splitters. */
8409 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
8410 output_asm_insn ("fld\t%y1", operands
);
8412 gcc_assert (STACK_TOP_P (operands
[1]));
8413 gcc_assert (GET_CODE (operands
[0]) == MEM
);
8416 output_asm_insn ("fisttp%z0\t%0", operands
);
8419 if (round_mode
!= I387_CW_ANY
)
8420 output_asm_insn ("fldcw\t%3", operands
);
8421 if (stack_top_dies
|| dimode_p
)
8422 output_asm_insn ("fistp%z0\t%0", operands
);
8424 output_asm_insn ("fist%z0\t%0", operands
);
8425 if (round_mode
!= I387_CW_ANY
)
8426 output_asm_insn ("fldcw\t%2", operands
);
8432 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8433 should be used. UNORDERED_P is true when fucom should be used. */
8436 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8439 rtx cmp_op0
, cmp_op1
;
8440 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
8444 cmp_op0
= operands
[0];
8445 cmp_op1
= operands
[1];
8449 cmp_op0
= operands
[1];
8450 cmp_op1
= operands
[2];
8455 if (GET_MODE (operands
[0]) == SFmode
)
8457 return "ucomiss\t{%1, %0|%0, %1}";
8459 return "comiss\t{%1, %0|%0, %1}";
8462 return "ucomisd\t{%1, %0|%0, %1}";
8464 return "comisd\t{%1, %0|%0, %1}";
8467 gcc_assert (STACK_TOP_P (cmp_op0
));
8469 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8471 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
8475 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
8476 return TARGET_USE_FFREEP
? "ffreep\t%y1" : "fstp\t%y1";
8479 return "ftst\n\tfnstsw\t%0";
8482 if (STACK_REG_P (cmp_op1
)
8484 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8485 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8487 /* If both the top of the 387 stack dies, and the other operand
8488 is also a stack register that dies, then this must be a
8489 `fcompp' float compare */
8493 /* There is no double popping fcomi variant. Fortunately,
8494 eflags is immune from the fstp's cc clobbering. */
8496 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8498 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8499 return TARGET_USE_FFREEP
? "ffreep\t%y0" : "fstp\t%y0";
8504 return "fucompp\n\tfnstsw\t%0";
8506 return "fcompp\n\tfnstsw\t%0";
8511 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8513 static const char * const alt
[16] =
8515 "fcom%z2\t%y2\n\tfnstsw\t%0",
8516 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8517 "fucom%z2\t%y2\n\tfnstsw\t%0",
8518 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8520 "ficom%z2\t%y2\n\tfnstsw\t%0",
8521 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8525 "fcomi\t{%y1, %0|%0, %y1}",
8526 "fcomip\t{%y1, %0|%0, %y1}",
8527 "fucomi\t{%y1, %0|%0, %y1}",
8528 "fucomip\t{%y1, %0|%0, %y1}",
8539 mask
= eflags_p
<< 3;
8540 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
8541 mask
|= unordered_p
<< 1;
8542 mask
|= stack_top_dies
;
8544 gcc_assert (mask
< 16);
8553 ix86_output_addr_vec_elt (FILE *file
, int value
)
8555 const char *directive
= ASM_LONG
;
8559 directive
= ASM_QUAD
;
8561 gcc_assert (!TARGET_64BIT
);
8564 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
8568 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
8571 fprintf (file
, "%s%s%d-%s%d\n",
8572 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
8573 else if (HAVE_AS_GOTOFF_IN_DATA
)
8574 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
8576 else if (TARGET_MACHO
)
8578 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
8579 machopic_output_function_base_name (file
);
8580 fprintf(file
, "\n");
8584 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
8585 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
8588 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8592 ix86_expand_clear (rtx dest
)
8596 /* We play register width games, which are only valid after reload. */
8597 gcc_assert (reload_completed
);
8599 /* Avoid HImode and its attendant prefix byte. */
8600 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
8601 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
8603 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
8605 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8606 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
8608 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
8609 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8615 /* X is an unchanging MEM. If it is a constant pool reference, return
8616 the constant pool rtx, else NULL. */
8619 maybe_get_pool_constant (rtx x
)
8621 x
= ix86_delegitimize_address (XEXP (x
, 0));
8623 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8624 return get_pool_constant (x
);
8630 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
8632 int strict
= (reload_in_progress
|| reload_completed
);
8634 enum tls_model model
;
8639 if (GET_CODE (op1
) == SYMBOL_REF
)
8641 model
= SYMBOL_REF_TLS_MODEL (op1
);
8644 op1
= legitimize_tls_address (op1
, model
, true);
8645 op1
= force_operand (op1
, op0
);
8650 else if (GET_CODE (op1
) == CONST
8651 && GET_CODE (XEXP (op1
, 0)) == PLUS
8652 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
8654 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
8657 rtx addend
= XEXP (XEXP (op1
, 0), 1);
8658 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
8659 op1
= force_operand (op1
, NULL
);
8660 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
8661 op0
, 1, OPTAB_DIRECT
);
8667 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
8672 rtx temp
= ((reload_in_progress
8673 || ((op0
&& GET_CODE (op0
) == REG
)
8675 ? op0
: gen_reg_rtx (Pmode
));
8676 op1
= machopic_indirect_data_reference (op1
, temp
);
8677 op1
= machopic_legitimize_pic_address (op1
, mode
,
8678 temp
== op1
? 0 : temp
);
8680 else if (MACHOPIC_INDIRECT
)
8681 op1
= machopic_indirect_data_reference (op1
, 0);
8685 if (GET_CODE (op0
) == MEM
)
8686 op1
= force_reg (Pmode
, op1
);
8688 op1
= legitimize_address (op1
, op1
, Pmode
);
8689 #endif /* TARGET_MACHO */
8693 if (GET_CODE (op0
) == MEM
8694 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
8695 || !push_operand (op0
, mode
))
8696 && GET_CODE (op1
) == MEM
)
8697 op1
= force_reg (mode
, op1
);
8699 if (push_operand (op0
, mode
)
8700 && ! general_no_elim_operand (op1
, mode
))
8701 op1
= copy_to_mode_reg (mode
, op1
);
8703 /* Force large constants in 64bit compilation into register
8704 to get them CSEed. */
8705 if (TARGET_64BIT
&& mode
== DImode
8706 && immediate_operand (op1
, mode
)
8707 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
8708 && !register_operand (op0
, mode
)
8709 && optimize
&& !reload_completed
&& !reload_in_progress
)
8710 op1
= copy_to_mode_reg (mode
, op1
);
8712 if (FLOAT_MODE_P (mode
))
8714 /* If we are loading a floating point constant to a register,
8715 force the value to memory now, since we'll get better code
8716 out the back end. */
8720 else if (GET_CODE (op1
) == CONST_DOUBLE
)
8722 op1
= validize_mem (force_const_mem (mode
, op1
));
8723 if (!register_operand (op0
, mode
))
8725 rtx temp
= gen_reg_rtx (mode
);
8726 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
8727 emit_move_insn (op0
, temp
);
8734 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
8738 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
8740 rtx op0
= operands
[0], op1
= operands
[1];
8742 /* Force constants other than zero into memory. We do not know how
8743 the instructions used to build constants modify the upper 64 bits
8744 of the register, once we have that information we may be able
8745 to handle some of them more efficiently. */
8746 if ((reload_in_progress
| reload_completed
) == 0
8747 && register_operand (op0
, mode
)
8748 && CONSTANT_P (op1
) && op1
!= CONST0_RTX (mode
))
8749 op1
= validize_mem (force_const_mem (mode
, op1
));
8751 /* Make operand1 a register if it isn't already. */
8753 && !register_operand (op0
, mode
)
8754 && !register_operand (op1
, mode
))
8756 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
8760 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
8763 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
8764 straight to ix86_expand_vector_move. */
8767 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
8776 /* If we're optimizing for size, movups is the smallest. */
8779 op0
= gen_lowpart (V4SFmode
, op0
);
8780 op1
= gen_lowpart (V4SFmode
, op1
);
8781 emit_insn (gen_sse_movups (op0
, op1
));
8785 /* ??? If we have typed data, then it would appear that using
8786 movdqu is the only way to get unaligned data loaded with
8788 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
8790 op0
= gen_lowpart (V16QImode
, op0
);
8791 op1
= gen_lowpart (V16QImode
, op1
);
8792 emit_insn (gen_sse2_movdqu (op0
, op1
));
8796 if (TARGET_SSE2
&& mode
== V2DFmode
)
8800 /* When SSE registers are split into halves, we can avoid
8801 writing to the top half twice. */
8802 if (TARGET_SSE_SPLIT_REGS
)
8804 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
8809 /* ??? Not sure about the best option for the Intel chips.
8810 The following would seem to satisfy; the register is
8811 entirely cleared, breaking the dependency chain. We
8812 then store to the upper half, with a dependency depth
8813 of one. A rumor has it that Intel recommends two movsd
8814 followed by an unpacklpd, but this is unconfirmed. And
8815 given that the dependency depth of the unpacklpd would
8816 still be one, I'm not sure why this would be better. */
8817 zero
= CONST0_RTX (V2DFmode
);
8820 m
= adjust_address (op1
, DFmode
, 0);
8821 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
8822 m
= adjust_address (op1
, DFmode
, 8);
8823 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
8827 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
8828 emit_move_insn (op0
, CONST0_RTX (mode
));
8830 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
8832 if (mode
!= V4SFmode
)
8833 op0
= gen_lowpart (V4SFmode
, op0
);
8834 m
= adjust_address (op1
, V2SFmode
, 0);
8835 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
8836 m
= adjust_address (op1
, V2SFmode
, 8);
8837 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
8840 else if (MEM_P (op0
))
8842 /* If we're optimizing for size, movups is the smallest. */
8845 op0
= gen_lowpart (V4SFmode
, op0
);
8846 op1
= gen_lowpart (V4SFmode
, op1
);
8847 emit_insn (gen_sse_movups (op0
, op1
));
8851 /* ??? Similar to above, only less clear because of quote
8852 typeless stores unquote. */
8853 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
8854 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
8856 op0
= gen_lowpart (V16QImode
, op0
);
8857 op1
= gen_lowpart (V16QImode
, op1
);
8858 emit_insn (gen_sse2_movdqu (op0
, op1
));
8862 if (TARGET_SSE2
&& mode
== V2DFmode
)
8864 m
= adjust_address (op0
, DFmode
, 0);
8865 emit_insn (gen_sse2_storelpd (m
, op1
));
8866 m
= adjust_address (op0
, DFmode
, 8);
8867 emit_insn (gen_sse2_storehpd (m
, op1
));
8871 if (mode
!= V4SFmode
)
8872 op1
= gen_lowpart (V4SFmode
, op1
);
8873 m
= adjust_address (op0
, V2SFmode
, 0);
8874 emit_insn (gen_sse_storelps (m
, op1
));
8875 m
= adjust_address (op0
, V2SFmode
, 8);
8876 emit_insn (gen_sse_storehps (m
, op1
));
8883 /* Expand a push in MODE. This is some mode for which we do not support
8884 proper push instructions, at least from the registers that we expect
8885 the value to live in. */
8888 ix86_expand_push (enum machine_mode mode
, rtx x
)
8892 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
8893 GEN_INT (-GET_MODE_SIZE (mode
)),
8894 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
8895 if (tmp
!= stack_pointer_rtx
)
8896 emit_move_insn (stack_pointer_rtx
, tmp
);
8898 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
8899 emit_move_insn (tmp
, x
);
8902 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8903 destination to use for the operation. If different from the true
8904 destination in operands[0], a copy operation will be required. */
8907 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
8910 int matching_memory
;
8911 rtx src1
, src2
, dst
;
8917 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8918 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8919 && (rtx_equal_p (dst
, src2
)
8920 || immediate_operand (src1
, mode
)))
8927 /* If the destination is memory, and we do not have matching source
8928 operands, do things in registers. */
8929 matching_memory
= 0;
8930 if (GET_CODE (dst
) == MEM
)
8932 if (rtx_equal_p (dst
, src1
))
8933 matching_memory
= 1;
8934 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8935 && rtx_equal_p (dst
, src2
))
8936 matching_memory
= 2;
8938 dst
= gen_reg_rtx (mode
);
8941 /* Both source operands cannot be in memory. */
8942 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
8944 if (matching_memory
!= 2)
8945 src2
= force_reg (mode
, src2
);
8947 src1
= force_reg (mode
, src1
);
8950 /* If the operation is not commutable, source 1 cannot be a constant
8951 or non-matching memory. */
8952 if ((CONSTANT_P (src1
)
8953 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
8954 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8955 src1
= force_reg (mode
, src1
);
8957 src1
= operands
[1] = src1
;
8958 src2
= operands
[2] = src2
;
8962 /* Similarly, but assume that the destination has already been
8966 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
8967 enum machine_mode mode
, rtx operands
[])
8969 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
8970 gcc_assert (dst
== operands
[0]);
8973 /* Attempt to expand a binary operator. Make the expansion closer to the
8974 actual machine, then just general_operand, which will allow 3 separate
8975 memory references (one output, two input) in a single insn. */
8978 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
8981 rtx src1
, src2
, dst
, op
, clob
;
8983 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
8987 /* Emit the instruction. */
8989 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
8990 if (reload_in_progress
)
8992 /* Reload doesn't know about the flags register, and doesn't know that
8993 it doesn't want to clobber it. We can only do this with PLUS. */
8994 gcc_assert (code
== PLUS
);
8999 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9000 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9003 /* Fix up the destination if needed. */
9004 if (dst
!= operands
[0])
9005 emit_move_insn (operands
[0], dst
);
9008 /* Return TRUE or FALSE depending on whether the binary operator meets the
9009 appropriate constraints. */
9012 ix86_binary_operator_ok (enum rtx_code code
,
9013 enum machine_mode mode ATTRIBUTE_UNUSED
,
9016 /* Both source operands cannot be in memory. */
9017 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
9019 /* If the operation is not commutable, source 1 cannot be a constant. */
9020 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9022 /* If the destination is memory, we must have a matching source operand. */
9023 if (GET_CODE (operands
[0]) == MEM
9024 && ! (rtx_equal_p (operands
[0], operands
[1])
9025 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9026 && rtx_equal_p (operands
[0], operands
[2]))))
9028 /* If the operation is not commutable and the source 1 is memory, we must
9029 have a matching destination. */
9030 if (GET_CODE (operands
[1]) == MEM
9031 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
9032 && ! rtx_equal_p (operands
[0], operands
[1]))
9037 /* Attempt to expand a unary operator. Make the expansion closer to the
9038 actual machine, then just general_operand, which will allow 2 separate
9039 memory references (one output, one input) in a single insn. */
9042 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9045 int matching_memory
;
9046 rtx src
, dst
, op
, clob
;
9051 /* If the destination is memory, and we do not have matching source
9052 operands, do things in registers. */
9053 matching_memory
= 0;
9056 if (rtx_equal_p (dst
, src
))
9057 matching_memory
= 1;
9059 dst
= gen_reg_rtx (mode
);
9062 /* When source operand is memory, destination must match. */
9063 if (MEM_P (src
) && !matching_memory
)
9064 src
= force_reg (mode
, src
);
9066 /* Emit the instruction. */
9068 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9069 if (reload_in_progress
|| code
== NOT
)
9071 /* Reload doesn't know about the flags register, and doesn't know that
9072 it doesn't want to clobber it. */
9073 gcc_assert (code
== NOT
);
9078 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9079 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9082 /* Fix up the destination if needed. */
9083 if (dst
!= operands
[0])
9084 emit_move_insn (operands
[0], dst
);
9087 /* Return TRUE or FALSE depending on whether the unary operator meets the
9088 appropriate constraints. */
9091 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9092 enum machine_mode mode ATTRIBUTE_UNUSED
,
9093 rtx operands
[2] ATTRIBUTE_UNUSED
)
9095 /* If one of operands is memory, source and destination must match. */
9096 if ((GET_CODE (operands
[0]) == MEM
9097 || GET_CODE (operands
[1]) == MEM
)
9098 && ! rtx_equal_p (operands
[0], operands
[1]))
9103 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9104 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9105 true, then replicate the mask for all elements of the vector register.
9106 If INVERT is true, then create a mask excluding the sign bit. */
9109 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
9111 enum machine_mode vec_mode
;
9112 HOST_WIDE_INT hi
, lo
;
9117 /* Find the sign bit, sign extended to 2*HWI. */
9119 lo
= 0x80000000, hi
= lo
< 0;
9120 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9121 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
9123 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
9128 /* Force this value into the low part of a fp vector constant. */
9129 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
9130 mask
= gen_lowpart (mode
, mask
);
9135 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
9137 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
9138 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9139 vec_mode
= V4SFmode
;
9144 v
= gen_rtvec (2, mask
, mask
);
9146 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
9147 vec_mode
= V2DFmode
;
9150 return force_reg (vec_mode
, gen_rtx_CONST_VECTOR (vec_mode
, v
));
9153 /* Generate code for floating point ABS or NEG. */
9156 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
9159 rtx mask
, set
, use
, clob
, dst
, src
;
9160 bool matching_memory
;
9161 bool use_sse
= false;
9162 bool vector_mode
= VECTOR_MODE_P (mode
);
9163 enum machine_mode elt_mode
= mode
;
9167 elt_mode
= GET_MODE_INNER (mode
);
9170 else if (TARGET_SSE_MATH
)
9171 use_sse
= SSE_FLOAT_MODE_P (mode
);
9173 /* NEG and ABS performed with SSE use bitwise mask operations.
9174 Create the appropriate mask now. */
9176 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
9179 /* When not using SSE, we don't use the mask, but prefer to keep the
9180 same general form of the insn pattern to reduce duplication when
9181 it comes time to split. */
9188 /* If the destination is memory, and we don't have matching source
9189 operands, do things in registers. */
9190 matching_memory
= false;
9193 if (rtx_equal_p (dst
, src
))
9194 matching_memory
= true;
9196 dst
= gen_reg_rtx (mode
);
9198 if (MEM_P (src
) && !matching_memory
)
9199 src
= force_reg (mode
, src
);
9203 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
9204 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9209 set
= gen_rtx_fmt_e (code
, mode
, src
);
9210 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9211 use
= gen_rtx_USE (VOIDmode
, mask
);
9212 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9213 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (3, set
, use
, clob
)));
9216 if (dst
!= operands
[0])
9217 emit_move_insn (operands
[0], dst
);
9220 /* Expand a copysign operation. Special case operand 0 being a constant. */
9223 ix86_expand_copysign (rtx operands
[])
9225 enum machine_mode mode
, vmode
;
9226 rtx dest
, op0
, op1
, mask
, nmask
;
9232 mode
= GET_MODE (dest
);
9233 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
9235 if (GET_CODE (op0
) == CONST_DOUBLE
)
9239 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
9240 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
9242 if (op0
== CONST0_RTX (mode
))
9243 op0
= CONST0_RTX (vmode
);
9247 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
9248 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9250 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
9251 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
9254 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9257 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
9259 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
9263 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
9264 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9267 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9269 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9273 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9274 be a constant, and so has already been expanded into a vector constant. */
9277 ix86_split_copysign_const (rtx operands
[])
9279 enum machine_mode mode
, vmode
;
9280 rtx dest
, op0
, op1
, mask
, x
;
9287 mode
= GET_MODE (dest
);
9288 vmode
= GET_MODE (mask
);
9290 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
9291 x
= gen_rtx_AND (vmode
, dest
, mask
);
9292 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9294 if (op0
!= CONST0_RTX (vmode
))
9296 x
= gen_rtx_IOR (vmode
, dest
, op0
);
9297 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9301 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9302 so we have to do two masks. */
9305 ix86_split_copysign_var (rtx operands
[])
9307 enum machine_mode mode
, vmode
;
9308 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
9311 scratch
= operands
[1];
9314 nmask
= operands
[4];
9317 mode
= GET_MODE (dest
);
9318 vmode
= GET_MODE (mask
);
9320 if (rtx_equal_p (op0
, op1
))
9322 /* Shouldn't happen often (it's useless, obviously), but when it does
9323 we'd generate incorrect code if we continue below. */
9324 emit_move_insn (dest
, op0
);
9328 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
9330 gcc_assert (REGNO (op1
) == REGNO (scratch
));
9332 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9333 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9336 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9337 x
= gen_rtx_NOT (vmode
, dest
);
9338 x
= gen_rtx_AND (vmode
, x
, op0
);
9339 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9343 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
9345 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9347 else /* alternative 2,4 */
9349 gcc_assert (REGNO (mask
) == REGNO (scratch
));
9350 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
9351 x
= gen_rtx_AND (vmode
, scratch
, op1
);
9353 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9355 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
9357 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9358 x
= gen_rtx_AND (vmode
, dest
, nmask
);
9360 else /* alternative 3,4 */
9362 gcc_assert (REGNO (nmask
) == REGNO (dest
));
9364 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9365 x
= gen_rtx_AND (vmode
, dest
, op0
);
9367 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9370 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
9371 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9374 /* Return TRUE or FALSE depending on whether the first SET in INSN
9375 has source and destination with matching CC modes, and that the
9376 CC mode is at least as constrained as REQ_MODE. */
9379 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
9382 enum machine_mode set_mode
;
9384 set
= PATTERN (insn
);
9385 if (GET_CODE (set
) == PARALLEL
)
9386 set
= XVECEXP (set
, 0, 0);
9387 gcc_assert (GET_CODE (set
) == SET
);
9388 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
9390 set_mode
= GET_MODE (SET_DEST (set
));
9394 if (req_mode
!= CCNOmode
9395 && (req_mode
!= CCmode
9396 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
9400 if (req_mode
== CCGCmode
)
9404 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
9408 if (req_mode
== CCZmode
)
9418 return (GET_MODE (SET_SRC (set
)) == set_mode
);
9421 /* Generate insn patterns to do an integer compare of OPERANDS. */
9424 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
9426 enum machine_mode cmpmode
;
9429 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
9430 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
9432 /* This is very simple, but making the interface the same as in the
9433 FP case makes the rest of the code easier. */
9434 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
9435 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
9437 /* Return the test that should be put into the flags user, i.e.
9438 the bcc, scc, or cmov instruction. */
9439 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
9442 /* Figure out whether to use ordered or unordered fp comparisons.
9443 Return the appropriate mode to use. */
9446 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
9448 /* ??? In order to make all comparisons reversible, we do all comparisons
9449 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9450 all forms trapping and nontrapping comparisons, we can make inequality
9451 comparisons trapping again, since it results in better code when using
9452 FCOM based compares. */
9453 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
9457 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
9459 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
9460 return ix86_fp_compare_mode (code
);
9463 /* Only zero flag is needed. */
9465 case NE
: /* ZF!=0 */
9467 /* Codes needing carry flag. */
9468 case GEU
: /* CF=0 */
9469 case GTU
: /* CF=0 & ZF=0 */
9470 case LTU
: /* CF=1 */
9471 case LEU
: /* CF=1 | ZF=1 */
9473 /* Codes possibly doable only with sign flag when
9474 comparing against zero. */
9475 case GE
: /* SF=OF or SF=0 */
9476 case LT
: /* SF<>OF or SF=1 */
9477 if (op1
== const0_rtx
)
9480 /* For other cases Carry flag is not required. */
9482 /* Codes doable only with sign flag when comparing
9483 against zero, but we miss jump instruction for it
9484 so we need to use relational tests against overflow
9485 that thus needs to be zero. */
9486 case GT
: /* ZF=0 & SF=OF */
9487 case LE
: /* ZF=1 | SF<>OF */
9488 if (op1
== const0_rtx
)
9492 /* strcmp pattern do (use flags) and combine may ask us for proper
9501 /* Return the fixed registers used for condition codes. */
9504 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
9511 /* If two condition code modes are compatible, return a condition code
9512 mode which is compatible with both. Otherwise, return
9515 static enum machine_mode
9516 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
9521 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
9524 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
9525 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
9553 /* These are only compatible with themselves, which we already
9559 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9562 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
9564 enum rtx_code swapped_code
= swap_condition (code
);
9565 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
9566 || (ix86_fp_comparison_cost (swapped_code
)
9567 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
9570 /* Swap, force into registers, or otherwise massage the two operands
9571 to a fp comparison. The operands are updated in place; the new
9572 comparison code is returned. */
9574 static enum rtx_code
9575 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
9577 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
9578 rtx op0
= *pop0
, op1
= *pop1
;
9579 enum machine_mode op_mode
= GET_MODE (op0
);
9580 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
9582 /* All of the unordered compare instructions only work on registers.
9583 The same is true of the fcomi compare instructions. The XFmode
9584 compare instructions require registers except when comparing
9585 against zero or when converting operand 1 from fixed point to
9589 && (fpcmp_mode
== CCFPUmode
9590 || (op_mode
== XFmode
9591 && ! (standard_80387_constant_p (op0
) == 1
9592 || standard_80387_constant_p (op1
) == 1)
9593 && GET_CODE (op1
) != FLOAT
)
9594 || ix86_use_fcomi_compare (code
)))
9596 op0
= force_reg (op_mode
, op0
);
9597 op1
= force_reg (op_mode
, op1
);
9601 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9602 things around if they appear profitable, otherwise force op0
9605 if (standard_80387_constant_p (op0
) == 0
9606 || (GET_CODE (op0
) == MEM
9607 && ! (standard_80387_constant_p (op1
) == 0
9608 || GET_CODE (op1
) == MEM
)))
9611 tmp
= op0
, op0
= op1
, op1
= tmp
;
9612 code
= swap_condition (code
);
9615 if (GET_CODE (op0
) != REG
)
9616 op0
= force_reg (op_mode
, op0
);
9618 if (CONSTANT_P (op1
))
9620 int tmp
= standard_80387_constant_p (op1
);
9622 op1
= validize_mem (force_const_mem (op_mode
, op1
));
9626 op1
= force_reg (op_mode
, op1
);
9629 op1
= force_reg (op_mode
, op1
);
9633 /* Try to rearrange the comparison to make it cheaper. */
9634 if (ix86_fp_comparison_cost (code
)
9635 > ix86_fp_comparison_cost (swap_condition (code
))
9636 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
9639 tmp
= op0
, op0
= op1
, op1
= tmp
;
9640 code
= swap_condition (code
);
9641 if (GET_CODE (op0
) != REG
)
9642 op0
= force_reg (op_mode
, op0
);
9650 /* Convert comparison codes we use to represent FP comparison to integer
9651 code that will result in proper branch. Return UNKNOWN if no such code
9655 ix86_fp_compare_code_to_integer (enum rtx_code code
)
9684 /* Split comparison code CODE into comparisons we can do using branch
9685 instructions. BYPASS_CODE is comparison code for branch that will
9686 branch around FIRST_CODE and SECOND_CODE. If some of branches
9687 is not required, set value to UNKNOWN.
9688 We never require more than two branches. */
9691 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
9692 enum rtx_code
*first_code
,
9693 enum rtx_code
*second_code
)
9696 *bypass_code
= UNKNOWN
;
9697 *second_code
= UNKNOWN
;
9699 /* The fcomi comparison sets flags as follows:
9709 case GT
: /* GTU - CF=0 & ZF=0 */
9710 case GE
: /* GEU - CF=0 */
9711 case ORDERED
: /* PF=0 */
9712 case UNORDERED
: /* PF=1 */
9713 case UNEQ
: /* EQ - ZF=1 */
9714 case UNLT
: /* LTU - CF=1 */
9715 case UNLE
: /* LEU - CF=1 | ZF=1 */
9716 case LTGT
: /* EQ - ZF=0 */
9718 case LT
: /* LTU - CF=1 - fails on unordered */
9720 *bypass_code
= UNORDERED
;
9722 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
9724 *bypass_code
= UNORDERED
;
9726 case EQ
: /* EQ - ZF=1 - fails on unordered */
9728 *bypass_code
= UNORDERED
;
9730 case NE
: /* NE - ZF=0 - fails on unordered */
9732 *second_code
= UNORDERED
;
9734 case UNGE
: /* GEU - CF=0 - fails on unordered */
9736 *second_code
= UNORDERED
;
9738 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
9740 *second_code
= UNORDERED
;
9745 if (!TARGET_IEEE_FP
)
9747 *second_code
= UNKNOWN
;
9748 *bypass_code
= UNKNOWN
;
9752 /* Return cost of comparison done fcom + arithmetics operations on AX.
9753 All following functions do use number of instructions as a cost metrics.
9754 In future this should be tweaked to compute bytes for optimize_size and
9755 take into account performance of various instructions on various CPUs. */
9757 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
9759 if (!TARGET_IEEE_FP
)
9761 /* The cost of code output by ix86_expand_fp_compare. */
9789 /* Return cost of comparison done using fcomi operation.
9790 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9792 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
9794 enum rtx_code bypass_code
, first_code
, second_code
;
9795 /* Return arbitrarily high cost when instruction is not supported - this
9796 prevents gcc from using it. */
9799 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9800 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
9803 /* Return cost of comparison done using sahf operation.
9804 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9806 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
9808 enum rtx_code bypass_code
, first_code
, second_code
;
9809 /* Return arbitrarily high cost when instruction is not preferred - this
9810 avoids gcc from using it. */
9811 if (!TARGET_USE_SAHF
&& !optimize_size
)
9813 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9814 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
9817 /* Compute cost of the comparison done using any method.
9818 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9820 ix86_fp_comparison_cost (enum rtx_code code
)
9822 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
9825 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
9826 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
9828 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
9829 if (min
> sahf_cost
)
9831 if (min
> fcomi_cost
)
9836 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9839 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
9840 rtx
*second_test
, rtx
*bypass_test
)
9842 enum machine_mode fpcmp_mode
, intcmp_mode
;
9844 int cost
= ix86_fp_comparison_cost (code
);
9845 enum rtx_code bypass_code
, first_code
, second_code
;
9847 fpcmp_mode
= ix86_fp_compare_mode (code
);
9848 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
9851 *second_test
= NULL_RTX
;
9853 *bypass_test
= NULL_RTX
;
9855 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9857 /* Do fcomi/sahf based test when profitable. */
9858 if ((bypass_code
== UNKNOWN
|| bypass_test
)
9859 && (second_code
== UNKNOWN
|| second_test
)
9860 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
9864 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9865 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
9871 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9872 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9874 scratch
= gen_reg_rtx (HImode
);
9875 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9876 emit_insn (gen_x86_sahf_1 (scratch
));
9879 /* The FP codes work out to act like unsigned. */
9880 intcmp_mode
= fpcmp_mode
;
9882 if (bypass_code
!= UNKNOWN
)
9883 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
9884 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9886 if (second_code
!= UNKNOWN
)
9887 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
9888 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9893 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9894 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9895 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9897 scratch
= gen_reg_rtx (HImode
);
9898 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9900 /* In the unordered case, we have to check C2 for NaN's, which
9901 doesn't happen to work out to anything nice combination-wise.
9902 So do some bit twiddling on the value we've got in AH to come
9903 up with an appropriate set of condition codes. */
9905 intcmp_mode
= CCNOmode
;
9910 if (code
== GT
|| !TARGET_IEEE_FP
)
9912 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9917 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9918 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9919 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
9920 intcmp_mode
= CCmode
;
9926 if (code
== LT
&& TARGET_IEEE_FP
)
9928 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9929 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
9930 intcmp_mode
= CCmode
;
9935 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
9941 if (code
== GE
|| !TARGET_IEEE_FP
)
9943 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
9948 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9949 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9956 if (code
== LE
&& TARGET_IEEE_FP
)
9958 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9959 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9960 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9961 intcmp_mode
= CCmode
;
9966 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9972 if (code
== EQ
&& TARGET_IEEE_FP
)
9974 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9975 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9976 intcmp_mode
= CCmode
;
9981 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9988 if (code
== NE
&& TARGET_IEEE_FP
)
9990 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9991 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9997 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10003 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10007 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10012 gcc_unreachable ();
10016 /* Return the test that should be put into the flags user, i.e.
10017 the bcc, scc, or cmov instruction. */
10018 return gen_rtx_fmt_ee (code
, VOIDmode
,
10019 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10024 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
10027 op0
= ix86_compare_op0
;
10028 op1
= ix86_compare_op1
;
10031 *second_test
= NULL_RTX
;
10033 *bypass_test
= NULL_RTX
;
10035 if (ix86_compare_emitted
)
10037 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
10038 ix86_compare_emitted
= NULL_RTX
;
10040 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10041 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10042 second_test
, bypass_test
);
10044 ret
= ix86_expand_int_compare (code
, op0
, op1
);
10049 /* Return true if the CODE will result in nontrivial jump sequence. */
10051 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
10053 enum rtx_code bypass_code
, first_code
, second_code
;
10056 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10057 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
10061 ix86_expand_branch (enum rtx_code code
, rtx label
)
10065 switch (GET_MODE (ix86_compare_op0
))
10071 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
10072 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10073 gen_rtx_LABEL_REF (VOIDmode
, label
),
10075 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10084 enum rtx_code bypass_code
, first_code
, second_code
;
10086 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
10087 &ix86_compare_op1
);
10089 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10091 /* Check whether we will use the natural sequence with one jump. If
10092 so, we can expand jump early. Otherwise delay expansion by
10093 creating compound insn to not confuse optimizers. */
10094 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
10097 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
10098 gen_rtx_LABEL_REF (VOIDmode
, label
),
10099 pc_rtx
, NULL_RTX
, NULL_RTX
);
10103 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
10104 ix86_compare_op0
, ix86_compare_op1
);
10105 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10106 gen_rtx_LABEL_REF (VOIDmode
, label
),
10108 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
10110 use_fcomi
= ix86_use_fcomi_compare (code
);
10111 vec
= rtvec_alloc (3 + !use_fcomi
);
10112 RTVEC_ELT (vec
, 0) = tmp
;
10114 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
10116 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
10119 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
10121 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
10130 /* Expand DImode branch into multiple compare+branch. */
10132 rtx lo
[2], hi
[2], label2
;
10133 enum rtx_code code1
, code2
, code3
;
10134 enum machine_mode submode
;
10136 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
10138 tmp
= ix86_compare_op0
;
10139 ix86_compare_op0
= ix86_compare_op1
;
10140 ix86_compare_op1
= tmp
;
10141 code
= swap_condition (code
);
10143 if (GET_MODE (ix86_compare_op0
) == DImode
)
10145 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10146 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10151 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10152 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10156 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10157 avoid two branches. This costs one extra insn, so disable when
10158 optimizing for size. */
10160 if ((code
== EQ
|| code
== NE
)
10162 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
10167 if (hi
[1] != const0_rtx
)
10168 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
10169 NULL_RTX
, 0, OPTAB_WIDEN
);
10172 if (lo
[1] != const0_rtx
)
10173 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
10174 NULL_RTX
, 0, OPTAB_WIDEN
);
10176 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
10177 NULL_RTX
, 0, OPTAB_WIDEN
);
10179 ix86_compare_op0
= tmp
;
10180 ix86_compare_op1
= const0_rtx
;
10181 ix86_expand_branch (code
, label
);
10185 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10186 op1 is a constant and the low word is zero, then we can just
10187 examine the high word. */
10189 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
10192 case LT
: case LTU
: case GE
: case GEU
:
10193 ix86_compare_op0
= hi
[0];
10194 ix86_compare_op1
= hi
[1];
10195 ix86_expand_branch (code
, label
);
10201 /* Otherwise, we need two or three jumps. */
10203 label2
= gen_label_rtx ();
10206 code2
= swap_condition (code
);
10207 code3
= unsigned_condition (code
);
10211 case LT
: case GT
: case LTU
: case GTU
:
10214 case LE
: code1
= LT
; code2
= GT
; break;
10215 case GE
: code1
= GT
; code2
= LT
; break;
10216 case LEU
: code1
= LTU
; code2
= GTU
; break;
10217 case GEU
: code1
= GTU
; code2
= LTU
; break;
10219 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
10220 case NE
: code2
= UNKNOWN
; break;
10223 gcc_unreachable ();
10228 * if (hi(a) < hi(b)) goto true;
10229 * if (hi(a) > hi(b)) goto false;
10230 * if (lo(a) < lo(b)) goto true;
10234 ix86_compare_op0
= hi
[0];
10235 ix86_compare_op1
= hi
[1];
10237 if (code1
!= UNKNOWN
)
10238 ix86_expand_branch (code1
, label
);
10239 if (code2
!= UNKNOWN
)
10240 ix86_expand_branch (code2
, label2
);
10242 ix86_compare_op0
= lo
[0];
10243 ix86_compare_op1
= lo
[1];
10244 ix86_expand_branch (code3
, label
);
10246 if (code2
!= UNKNOWN
)
10247 emit_label (label2
);
10252 gcc_unreachable ();
10256 /* Split branch based on floating point condition. */
10258 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
10259 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
10261 rtx second
, bypass
;
10262 rtx label
= NULL_RTX
;
10264 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
10267 if (target2
!= pc_rtx
)
10270 code
= reverse_condition_maybe_unordered (code
);
10275 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
10276 tmp
, &second
, &bypass
);
10278 /* Remove pushed operand from stack. */
10280 ix86_free_from_memory (GET_MODE (pushed
));
10282 if (split_branch_probability
>= 0)
10284 /* Distribute the probabilities across the jumps.
10285 Assume the BYPASS and SECOND to be always test
10287 probability
= split_branch_probability
;
10289 /* Value of 1 is low enough to make no need for probability
10290 to be updated. Later we may run some experiments and see
10291 if unordered values are more frequent in practice. */
10293 bypass_probability
= 1;
10295 second_probability
= 1;
10297 if (bypass
!= NULL_RTX
)
10299 label
= gen_label_rtx ();
10300 i
= emit_jump_insn (gen_rtx_SET
10302 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10304 gen_rtx_LABEL_REF (VOIDmode
,
10307 if (bypass_probability
>= 0)
10309 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10310 GEN_INT (bypass_probability
),
10313 i
= emit_jump_insn (gen_rtx_SET
10315 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10316 condition
, target1
, target2
)));
10317 if (probability
>= 0)
10319 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10320 GEN_INT (probability
),
10322 if (second
!= NULL_RTX
)
10324 i
= emit_jump_insn (gen_rtx_SET
10326 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
10328 if (second_probability
>= 0)
10330 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10331 GEN_INT (second_probability
),
10334 if (label
!= NULL_RTX
)
10335 emit_label (label
);
10339 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
10341 rtx ret
, tmp
, tmpreg
, equiv
;
10342 rtx second_test
, bypass_test
;
10344 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
10345 return 0; /* FAIL */
10347 gcc_assert (GET_MODE (dest
) == QImode
);
10349 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10350 PUT_MODE (ret
, QImode
);
10355 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
10356 if (bypass_test
|| second_test
)
10358 rtx test
= second_test
;
10360 rtx tmp2
= gen_reg_rtx (QImode
);
10363 gcc_assert (!second_test
);
10364 test
= bypass_test
;
10366 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
10368 PUT_MODE (test
, QImode
);
10369 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
10372 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
10374 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
10377 /* Attach a REG_EQUAL note describing the comparison result. */
10378 if (ix86_compare_op0
&& ix86_compare_op1
)
10380 equiv
= simplify_gen_relational (code
, QImode
,
10381 GET_MODE (ix86_compare_op0
),
10382 ix86_compare_op0
, ix86_compare_op1
);
10383 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
10386 return 1; /* DONE */
10389 /* Expand comparison setting or clearing carry flag. Return true when
10390 successful and set pop for the operation. */
10392 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
10394 enum machine_mode mode
=
10395 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
10397 /* Do not handle DImode compares that go trought special path. Also we can't
10398 deal with FP compares yet. This is possible to add. */
10399 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
10401 if (FLOAT_MODE_P (mode
))
10403 rtx second_test
= NULL
, bypass_test
= NULL
;
10404 rtx compare_op
, compare_seq
;
10406 /* Shortcut: following common codes never translate into carry flag compares. */
10407 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
10408 || code
== ORDERED
|| code
== UNORDERED
)
10411 /* These comparisons require zero flag; swap operands so they won't. */
10412 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
10413 && !TARGET_IEEE_FP
)
10418 code
= swap_condition (code
);
10421 /* Try to expand the comparison and verify that we end up with carry flag
10422 based comparison. This is fails to be true only when we decide to expand
10423 comparison using arithmetic that is not too common scenario. */
10425 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10426 &second_test
, &bypass_test
);
10427 compare_seq
= get_insns ();
10430 if (second_test
|| bypass_test
)
10432 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10433 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10434 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
10436 code
= GET_CODE (compare_op
);
10437 if (code
!= LTU
&& code
!= GEU
)
10439 emit_insn (compare_seq
);
10443 if (!INTEGRAL_MODE_P (mode
))
10451 /* Convert a==0 into (unsigned)a<1. */
10454 if (op1
!= const0_rtx
)
10457 code
= (code
== EQ
? LTU
: GEU
);
10460 /* Convert a>b into b<a or a>=b-1. */
10463 if (GET_CODE (op1
) == CONST_INT
)
10465 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
10466 /* Bail out on overflow. We still can swap operands but that
10467 would force loading of the constant into register. */
10468 if (op1
== const0_rtx
10469 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
10471 code
= (code
== GTU
? GEU
: LTU
);
10478 code
= (code
== GTU
? LTU
: GEU
);
10482 /* Convert a>=0 into (unsigned)a<0x80000000. */
10485 if (mode
== DImode
|| op1
!= const0_rtx
)
10487 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10488 code
= (code
== LT
? GEU
: LTU
);
10492 if (mode
== DImode
|| op1
!= constm1_rtx
)
10494 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10495 code
= (code
== LE
? GEU
: LTU
);
10501 /* Swapping operands may cause constant to appear as first operand. */
10502 if (!nonimmediate_operand (op0
, VOIDmode
))
10504 if (no_new_pseudos
)
10506 op0
= force_reg (mode
, op0
);
10508 ix86_compare_op0
= op0
;
10509 ix86_compare_op1
= op1
;
10510 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
10511 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
10516 ix86_expand_int_movcc (rtx operands
[])
10518 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
10519 rtx compare_seq
, compare_op
;
10520 rtx second_test
, bypass_test
;
10521 enum machine_mode mode
= GET_MODE (operands
[0]);
10522 bool sign_bit_compare_p
= false;;
10525 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10526 compare_seq
= get_insns ();
10529 compare_code
= GET_CODE (compare_op
);
10531 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
10532 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
10533 sign_bit_compare_p
= true;
10535 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10536 HImode insns, we'd be swallowed in word prefix ops. */
10538 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
10539 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
10540 && GET_CODE (operands
[2]) == CONST_INT
10541 && GET_CODE (operands
[3]) == CONST_INT
)
10543 rtx out
= operands
[0];
10544 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
10545 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
10546 HOST_WIDE_INT diff
;
10549 /* Sign bit compares are better done using shifts than we do by using
10551 if (sign_bit_compare_p
10552 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10553 ix86_compare_op1
, &compare_op
))
10555 /* Detect overlap between destination and compare sources. */
10558 if (!sign_bit_compare_p
)
10560 bool fpcmp
= false;
10562 compare_code
= GET_CODE (compare_op
);
10564 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10565 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10568 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
10571 /* To simplify rest of code, restrict to the GEU case. */
10572 if (compare_code
== LTU
)
10574 HOST_WIDE_INT tmp
= ct
;
10577 compare_code
= reverse_condition (compare_code
);
10578 code
= reverse_condition (code
);
10583 PUT_CODE (compare_op
,
10584 reverse_condition_maybe_unordered
10585 (GET_CODE (compare_op
)));
10587 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10591 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
10592 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
10593 tmp
= gen_reg_rtx (mode
);
10595 if (mode
== DImode
)
10596 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
10598 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
10602 if (code
== GT
|| code
== GE
)
10603 code
= reverse_condition (code
);
10606 HOST_WIDE_INT tmp
= ct
;
10611 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
10612 ix86_compare_op1
, VOIDmode
, 0, -1);
10625 tmp
= expand_simple_binop (mode
, PLUS
,
10627 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10638 tmp
= expand_simple_binop (mode
, IOR
,
10640 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10642 else if (diff
== -1 && ct
)
10652 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
10654 tmp
= expand_simple_binop (mode
, PLUS
,
10655 copy_rtx (tmp
), GEN_INT (cf
),
10656 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10664 * andl cf - ct, dest
10674 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
10677 tmp
= expand_simple_binop (mode
, AND
,
10679 gen_int_mode (cf
- ct
, mode
),
10680 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10682 tmp
= expand_simple_binop (mode
, PLUS
,
10683 copy_rtx (tmp
), GEN_INT (ct
),
10684 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10687 if (!rtx_equal_p (tmp
, out
))
10688 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
10690 return 1; /* DONE */
10696 tmp
= ct
, ct
= cf
, cf
= tmp
;
10698 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
10700 /* We may be reversing unordered compare to normal compare, that
10701 is not valid in general (we may convert non-trapping condition
10702 to trapping one), however on i386 we currently emit all
10703 comparisons unordered. */
10704 compare_code
= reverse_condition_maybe_unordered (compare_code
);
10705 code
= reverse_condition_maybe_unordered (code
);
10709 compare_code
= reverse_condition (compare_code
);
10710 code
= reverse_condition (code
);
10714 compare_code
= UNKNOWN
;
10715 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
10716 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
10718 if (ix86_compare_op1
== const0_rtx
10719 && (code
== LT
|| code
== GE
))
10720 compare_code
= code
;
10721 else if (ix86_compare_op1
== constm1_rtx
)
10725 else if (code
== GT
)
10730 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10731 if (compare_code
!= UNKNOWN
10732 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
10733 && (cf
== -1 || ct
== -1))
10735 /* If lea code below could be used, only optimize
10736 if it results in a 2 insn sequence. */
10738 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
10739 || diff
== 3 || diff
== 5 || diff
== 9)
10740 || (compare_code
== LT
&& ct
== -1)
10741 || (compare_code
== GE
&& cf
== -1))
10744 * notl op1 (if necessary)
10752 code
= reverse_condition (code
);
10755 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10756 ix86_compare_op1
, VOIDmode
, 0, -1);
10758 out
= expand_simple_binop (mode
, IOR
,
10760 out
, 1, OPTAB_DIRECT
);
10761 if (out
!= operands
[0])
10762 emit_move_insn (operands
[0], out
);
10764 return 1; /* DONE */
10769 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
10770 || diff
== 3 || diff
== 5 || diff
== 9)
10771 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
10773 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
10779 * lea cf(dest*(ct-cf)),dest
10783 * This also catches the degenerate setcc-only case.
10789 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10790 ix86_compare_op1
, VOIDmode
, 0, 1);
10793 /* On x86_64 the lea instruction operates on Pmode, so we need
10794 to get arithmetics done in proper mode to match. */
10796 tmp
= copy_rtx (out
);
10800 out1
= copy_rtx (out
);
10801 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
10805 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
10811 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
10814 if (!rtx_equal_p (tmp
, out
))
10817 out
= force_operand (tmp
, copy_rtx (out
));
10819 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
10821 if (!rtx_equal_p (out
, operands
[0]))
10822 emit_move_insn (operands
[0], copy_rtx (out
));
10824 return 1; /* DONE */
10828 * General case: Jumpful:
10829 * xorl dest,dest cmpl op1, op2
10830 * cmpl op1, op2 movl ct, dest
10831 * setcc dest jcc 1f
10832 * decl dest movl cf, dest
10833 * andl (cf-ct),dest 1:
10836 * Size 20. Size 14.
10838 * This is reasonably steep, but branch mispredict costs are
10839 * high on modern cpus, so consider failing only if optimizing
10843 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10844 && BRANCH_COST
>= 2)
10850 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
10851 /* We may be reversing unordered compare to normal compare,
10852 that is not valid in general (we may convert non-trapping
10853 condition to trapping one), however on i386 we currently
10854 emit all comparisons unordered. */
10855 code
= reverse_condition_maybe_unordered (code
);
10858 code
= reverse_condition (code
);
10859 if (compare_code
!= UNKNOWN
)
10860 compare_code
= reverse_condition (compare_code
);
10864 if (compare_code
!= UNKNOWN
)
10866 /* notl op1 (if needed)
10871 For x < 0 (resp. x <= -1) there will be no notl,
10872 so if possible swap the constants to get rid of the
10874 True/false will be -1/0 while code below (store flag
10875 followed by decrement) is 0/-1, so the constants need
10876 to be exchanged once more. */
10878 if (compare_code
== GE
|| !cf
)
10880 code
= reverse_condition (code
);
10885 HOST_WIDE_INT tmp
= cf
;
10890 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10891 ix86_compare_op1
, VOIDmode
, 0, -1);
10895 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10896 ix86_compare_op1
, VOIDmode
, 0, 1);
10898 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
10899 copy_rtx (out
), 1, OPTAB_DIRECT
);
10902 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
10903 gen_int_mode (cf
- ct
, mode
),
10904 copy_rtx (out
), 1, OPTAB_DIRECT
);
10906 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
10907 copy_rtx (out
), 1, OPTAB_DIRECT
);
10908 if (!rtx_equal_p (out
, operands
[0]))
10909 emit_move_insn (operands
[0], copy_rtx (out
));
10911 return 1; /* DONE */
10915 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10917 /* Try a few things more with specific constants and a variable. */
10920 rtx var
, orig_out
, out
, tmp
;
10922 if (BRANCH_COST
<= 2)
10923 return 0; /* FAIL */
10925 /* If one of the two operands is an interesting constant, load a
10926 constant with the above and mask it in with a logical operation. */
10928 if (GET_CODE (operands
[2]) == CONST_INT
)
10931 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
10932 operands
[3] = constm1_rtx
, op
= and_optab
;
10933 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
10934 operands
[3] = const0_rtx
, op
= ior_optab
;
10936 return 0; /* FAIL */
10938 else if (GET_CODE (operands
[3]) == CONST_INT
)
10941 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
10942 operands
[2] = constm1_rtx
, op
= and_optab
;
10943 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
10944 operands
[2] = const0_rtx
, op
= ior_optab
;
10946 return 0; /* FAIL */
10949 return 0; /* FAIL */
10951 orig_out
= operands
[0];
10952 tmp
= gen_reg_rtx (mode
);
10955 /* Recurse to get the constant loaded. */
10956 if (ix86_expand_int_movcc (operands
) == 0)
10957 return 0; /* FAIL */
10959 /* Mask in the interesting variable. */
10960 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
10962 if (!rtx_equal_p (out
, orig_out
))
10963 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
10965 return 1; /* DONE */
10969 * For comparison with above,
10979 if (! nonimmediate_operand (operands
[2], mode
))
10980 operands
[2] = force_reg (mode
, operands
[2]);
10981 if (! nonimmediate_operand (operands
[3], mode
))
10982 operands
[3] = force_reg (mode
, operands
[3]);
10984 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10986 rtx tmp
= gen_reg_rtx (mode
);
10987 emit_move_insn (tmp
, operands
[3]);
10990 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10992 rtx tmp
= gen_reg_rtx (mode
);
10993 emit_move_insn (tmp
, operands
[2]);
10997 if (! register_operand (operands
[2], VOIDmode
)
10999 || ! register_operand (operands
[3], VOIDmode
)))
11000 operands
[2] = force_reg (mode
, operands
[2]);
11003 && ! register_operand (operands
[3], VOIDmode
))
11004 operands
[3] = force_reg (mode
, operands
[3]);
11006 emit_insn (compare_seq
);
11007 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11008 gen_rtx_IF_THEN_ELSE (mode
,
11009 compare_op
, operands
[2],
11012 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11013 gen_rtx_IF_THEN_ELSE (mode
,
11015 copy_rtx (operands
[3]),
11016 copy_rtx (operands
[0]))));
11018 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11019 gen_rtx_IF_THEN_ELSE (mode
,
11021 copy_rtx (operands
[2]),
11022 copy_rtx (operands
[0]))));
11024 return 1; /* DONE */
11027 /* Swap, force into registers, or otherwise massage the two operands
11028 to an sse comparison with a mask result. Thus we differ a bit from
11029 ix86_prepare_fp_compare_args which expects to produce a flags result.
11031 The DEST operand exists to help determine whether to commute commutative
11032 operators. The POP0/POP1 operands are updated in place. The new
11033 comparison code is returned, or UNKNOWN if not implementable. */
11035 static enum rtx_code
11036 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
11037 rtx
*pop0
, rtx
*pop1
)
11045 /* We have no LTGT as an operator. We could implement it with
11046 NE & ORDERED, but this requires an extra temporary. It's
11047 not clear that it's worth it. */
11054 /* These are supported directly. */
11061 /* For commutative operators, try to canonicalize the destination
11062 operand to be first in the comparison - this helps reload to
11063 avoid extra moves. */
11064 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
11072 /* These are not supported directly. Swap the comparison operands
11073 to transform into something that is supported. */
11077 code
= swap_condition (code
);
11081 gcc_unreachable ();
11087 /* Detect conditional moves that exactly match min/max operational
11088 semantics. Note that this is IEEE safe, as long as we don't
11089 interchange the operands.
11091 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11092 and TRUE if the operation is successful and instructions are emitted. */
11095 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
11096 rtx cmp_op1
, rtx if_true
, rtx if_false
)
11098 enum machine_mode mode
;
11104 else if (code
== UNGE
)
11107 if_true
= if_false
;
11113 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
11115 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
11120 mode
= GET_MODE (dest
);
11122 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11123 but MODE may be a vector mode and thus not appropriate. */
11124 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
11126 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
11129 if_true
= force_reg (mode
, if_true
);
11130 v
= gen_rtvec (2, if_true
, if_false
);
11131 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
11135 code
= is_min
? SMIN
: SMAX
;
11136 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
11139 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
11143 /* Expand an sse vector comparison. Return the register with the result. */
11146 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
11147 rtx op_true
, rtx op_false
)
11149 enum machine_mode mode
= GET_MODE (dest
);
11152 cmp_op0
= force_reg (mode
, cmp_op0
);
11153 if (!nonimmediate_operand (cmp_op1
, mode
))
11154 cmp_op1
= force_reg (mode
, cmp_op1
);
11157 || reg_overlap_mentioned_p (dest
, op_true
)
11158 || reg_overlap_mentioned_p (dest
, op_false
))
11159 dest
= gen_reg_rtx (mode
);
11161 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
11162 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11167 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11168 operations. This is used for both scalar and vector conditional moves. */
11171 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
11173 enum machine_mode mode
= GET_MODE (dest
);
11176 if (op_false
== CONST0_RTX (mode
))
11178 op_true
= force_reg (mode
, op_true
);
11179 x
= gen_rtx_AND (mode
, cmp
, op_true
);
11180 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11182 else if (op_true
== CONST0_RTX (mode
))
11184 op_false
= force_reg (mode
, op_false
);
11185 x
= gen_rtx_NOT (mode
, cmp
);
11186 x
= gen_rtx_AND (mode
, x
, op_false
);
11187 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11191 op_true
= force_reg (mode
, op_true
);
11192 op_false
= force_reg (mode
, op_false
);
11194 t2
= gen_reg_rtx (mode
);
11196 t3
= gen_reg_rtx (mode
);
11200 x
= gen_rtx_AND (mode
, op_true
, cmp
);
11201 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
11203 x
= gen_rtx_NOT (mode
, cmp
);
11204 x
= gen_rtx_AND (mode
, x
, op_false
);
11205 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
11207 x
= gen_rtx_IOR (mode
, t3
, t2
);
11208 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11212 /* Expand a floating-point conditional move. Return true if successful. */
11215 ix86_expand_fp_movcc (rtx operands
[])
11217 enum machine_mode mode
= GET_MODE (operands
[0]);
11218 enum rtx_code code
= GET_CODE (operands
[1]);
11219 rtx tmp
, compare_op
, second_test
, bypass_test
;
11221 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
11223 enum machine_mode cmode
;
11225 /* Since we've no cmove for sse registers, don't force bad register
11226 allocation just to gain access to it. Deny movcc when the
11227 comparison mode doesn't match the move mode. */
11228 cmode
= GET_MODE (ix86_compare_op0
);
11229 if (cmode
== VOIDmode
)
11230 cmode
= GET_MODE (ix86_compare_op1
);
11234 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11236 &ix86_compare_op1
);
11237 if (code
== UNKNOWN
)
11240 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
11241 ix86_compare_op1
, operands
[2],
11245 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
11246 ix86_compare_op1
, operands
[2], operands
[3]);
11247 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
11251 /* The floating point conditional move instructions don't directly
11252 support conditions resulting from a signed integer comparison. */
11254 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11256 /* The floating point conditional move instructions don't directly
11257 support signed integer comparisons. */
11259 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
11261 gcc_assert (!second_test
&& !bypass_test
);
11262 tmp
= gen_reg_rtx (QImode
);
11263 ix86_expand_setcc (code
, tmp
);
11265 ix86_compare_op0
= tmp
;
11266 ix86_compare_op1
= const0_rtx
;
11267 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11269 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11271 tmp
= gen_reg_rtx (mode
);
11272 emit_move_insn (tmp
, operands
[3]);
11275 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11277 tmp
= gen_reg_rtx (mode
);
11278 emit_move_insn (tmp
, operands
[2]);
11282 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11283 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
11284 operands
[2], operands
[3])));
11286 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11287 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
11288 operands
[3], operands
[0])));
11290 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11291 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
11292 operands
[2], operands
[0])));
11297 /* Expand a floating-point vector conditional move; a vcond operation
11298 rather than a movcc operation. */
11301 ix86_expand_fp_vcond (rtx operands
[])
11303 enum rtx_code code
= GET_CODE (operands
[3]);
11306 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11307 &operands
[4], &operands
[5]);
11308 if (code
== UNKNOWN
)
11311 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
11312 operands
[5], operands
[1], operands
[2]))
11315 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
11316 operands
[1], operands
[2]);
11317 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
11321 /* Expand a signed integral vector conditional move. */
11324 ix86_expand_int_vcond (rtx operands
[])
11326 enum machine_mode mode
= GET_MODE (operands
[0]);
11327 enum rtx_code code
= GET_CODE (operands
[3]);
11328 bool negate
= false;
11331 cop0
= operands
[4];
11332 cop1
= operands
[5];
11334 /* Canonicalize the comparison to EQ, GT, GTU. */
11345 code
= reverse_condition (code
);
11351 code
= reverse_condition (code
);
11357 code
= swap_condition (code
);
11358 x
= cop0
, cop0
= cop1
, cop1
= x
;
11362 gcc_unreachable ();
11365 /* Unsigned parallel compare is not supported by the hardware. Play some
11366 tricks to turn this into a signed comparison against 0. */
11375 /* Perform a parallel modulo subtraction. */
11376 t1
= gen_reg_rtx (mode
);
11377 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
11379 /* Extract the original sign bit of op0. */
11380 mask
= GEN_INT (-0x80000000);
11381 mask
= gen_rtx_CONST_VECTOR (mode
,
11382 gen_rtvec (4, mask
, mask
, mask
, mask
));
11383 mask
= force_reg (mode
, mask
);
11384 t2
= gen_reg_rtx (mode
);
11385 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
11387 /* XOR it back into the result of the subtraction. This results
11388 in the sign bit set iff we saw unsigned underflow. */
11389 x
= gen_reg_rtx (mode
);
11390 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
11398 /* Perform a parallel unsigned saturating subtraction. */
11399 x
= gen_reg_rtx (mode
);
11400 emit_insn (gen_rtx_SET (VOIDmode
, x
,
11401 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
11408 gcc_unreachable ();
11412 cop1
= CONST0_RTX (mode
);
11415 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
11416 operands
[1+negate
], operands
[2-negate
]);
11418 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
11419 operands
[2-negate
]);
11423 /* Expand conditional increment or decrement using adb/sbb instructions.
11424 The default case using setcc followed by the conditional move can be
11425 done by generic code. */
11427 ix86_expand_int_addcc (rtx operands
[])
11429 enum rtx_code code
= GET_CODE (operands
[1]);
11431 rtx val
= const0_rtx
;
11432 bool fpcmp
= false;
11433 enum machine_mode mode
= GET_MODE (operands
[0]);
11435 if (operands
[3] != const1_rtx
11436 && operands
[3] != constm1_rtx
)
11438 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11439 ix86_compare_op1
, &compare_op
))
11441 code
= GET_CODE (compare_op
);
11443 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11444 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11447 code
= ix86_fp_compare_code_to_integer (code
);
11454 PUT_CODE (compare_op
,
11455 reverse_condition_maybe_unordered
11456 (GET_CODE (compare_op
)));
11458 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11460 PUT_MODE (compare_op
, mode
);
11462 /* Construct either adc or sbb insn. */
11463 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
11465 switch (GET_MODE (operands
[0]))
11468 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
11471 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
11474 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
11477 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
11480 gcc_unreachable ();
11485 switch (GET_MODE (operands
[0]))
11488 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
11491 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
11494 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
11497 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
11500 gcc_unreachable ();
11503 return 1; /* DONE */
11507 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11508 works for floating pointer parameters and nonoffsetable memories.
11509 For pushes, it returns just stack offsets; the values will be saved
11510 in the right order. Maximally three parts are generated. */
11513 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
11518 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
11520 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
11522 gcc_assert (GET_CODE (operand
) != REG
|| !MMX_REGNO_P (REGNO (operand
)));
11523 gcc_assert (size
>= 2 && size
<= 3);
11525 /* Optimize constant pool reference to immediates. This is used by fp
11526 moves, that force all constants to memory to allow combining. */
11527 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
11529 rtx tmp
= maybe_get_pool_constant (operand
);
11534 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
11536 /* The only non-offsetable memories we handle are pushes. */
11537 int ok
= push_operand (operand
, VOIDmode
);
11541 operand
= copy_rtx (operand
);
11542 PUT_MODE (operand
, Pmode
);
11543 parts
[0] = parts
[1] = parts
[2] = operand
;
11547 if (GET_CODE (operand
) == CONST_VECTOR
)
11549 enum machine_mode imode
= int_mode_for_mode (mode
);
11550 /* Caution: if we looked through a constant pool memory above,
11551 the operand may actually have a different mode now. That's
11552 ok, since we want to pun this all the way back to an integer. */
11553 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
11554 gcc_assert (operand
!= NULL
);
11560 if (mode
== DImode
)
11561 split_di (&operand
, 1, &parts
[0], &parts
[1]);
11564 if (REG_P (operand
))
11566 gcc_assert (reload_completed
);
11567 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
11568 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
11570 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
11572 else if (offsettable_memref_p (operand
))
11574 operand
= adjust_address (operand
, SImode
, 0);
11575 parts
[0] = operand
;
11576 parts
[1] = adjust_address (operand
, SImode
, 4);
11578 parts
[2] = adjust_address (operand
, SImode
, 8);
11580 else if (GET_CODE (operand
) == CONST_DOUBLE
)
11585 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
11589 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
11590 parts
[2] = gen_int_mode (l
[2], SImode
);
11593 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
11596 gcc_unreachable ();
11598 parts
[1] = gen_int_mode (l
[1], SImode
);
11599 parts
[0] = gen_int_mode (l
[0], SImode
);
11602 gcc_unreachable ();
11607 if (mode
== TImode
)
11608 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
11609 if (mode
== XFmode
|| mode
== TFmode
)
11611 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
11612 if (REG_P (operand
))
11614 gcc_assert (reload_completed
);
11615 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
11616 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
11618 else if (offsettable_memref_p (operand
))
11620 operand
= adjust_address (operand
, DImode
, 0);
11621 parts
[0] = operand
;
11622 parts
[1] = adjust_address (operand
, upper_mode
, 8);
11624 else if (GET_CODE (operand
) == CONST_DOUBLE
)
11629 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
11630 real_to_target (l
, &r
, mode
);
11632 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11633 if (HOST_BITS_PER_WIDE_INT
>= 64)
11636 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
11637 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
11640 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
11642 if (upper_mode
== SImode
)
11643 parts
[1] = gen_int_mode (l
[2], SImode
);
11644 else if (HOST_BITS_PER_WIDE_INT
>= 64)
11647 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
11648 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
11651 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
11654 gcc_unreachable ();
11661 /* Emit insns to perform a move or push of DI, DF, and XF values.
11662 Return false when normal moves are needed; true when all required
11663 insns have been emitted. Operands 2-4 contain the input values
11664 int the correct order; operands 5-7 contain the output values. */
11667 ix86_split_long_move (rtx operands
[])
11672 int collisions
= 0;
11673 enum machine_mode mode
= GET_MODE (operands
[0]);
11675 /* The DFmode expanders may ask us to move double.
11676 For 64bit target this is single move. By hiding the fact
11677 here we simplify i386.md splitters. */
11678 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
11680 /* Optimize constant pool reference to immediates. This is used by
11681 fp moves, that force all constants to memory to allow combining. */
11683 if (GET_CODE (operands
[1]) == MEM
11684 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
11685 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
11686 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
11687 if (push_operand (operands
[0], VOIDmode
))
11689 operands
[0] = copy_rtx (operands
[0]);
11690 PUT_MODE (operands
[0], Pmode
);
11693 operands
[0] = gen_lowpart (DImode
, operands
[0]);
11694 operands
[1] = gen_lowpart (DImode
, operands
[1]);
11695 emit_move_insn (operands
[0], operands
[1]);
11699 /* The only non-offsettable memory we handle is push. */
11700 if (push_operand (operands
[0], VOIDmode
))
11703 gcc_assert (GET_CODE (operands
[0]) != MEM
11704 || offsettable_memref_p (operands
[0]));
11706 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
11707 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
11709 /* When emitting push, take care for source operands on the stack. */
11710 if (push
&& GET_CODE (operands
[1]) == MEM
11711 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
11714 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
11715 XEXP (part
[1][2], 0));
11716 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
11717 XEXP (part
[1][1], 0));
11720 /* We need to do copy in the right order in case an address register
11721 of the source overlaps the destination. */
11722 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
11724 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
11726 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
11729 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
11732 /* Collision in the middle part can be handled by reordering. */
11733 if (collisions
== 1 && nparts
== 3
11734 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
11737 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
11738 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
11741 /* If there are more collisions, we can't handle it by reordering.
11742 Do an lea to the last part and use only one colliding move. */
11743 else if (collisions
> 1)
11749 base
= part
[0][nparts
- 1];
11751 /* Handle the case when the last part isn't valid for lea.
11752 Happens in 64-bit mode storing the 12-byte XFmode. */
11753 if (GET_MODE (base
) != Pmode
)
11754 base
= gen_rtx_REG (Pmode
, REGNO (base
));
11756 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
11757 part
[1][0] = replace_equiv_address (part
[1][0], base
);
11758 part
[1][1] = replace_equiv_address (part
[1][1],
11759 plus_constant (base
, UNITS_PER_WORD
));
11761 part
[1][2] = replace_equiv_address (part
[1][2],
11762 plus_constant (base
, 8));
11772 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
11773 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
11774 emit_move_insn (part
[0][2], part
[1][2]);
11779 /* In 64bit mode we don't have 32bit push available. In case this is
11780 register, it is OK - we will just use larger counterpart. We also
11781 retype memory - these comes from attempt to avoid REX prefix on
11782 moving of second half of TFmode value. */
11783 if (GET_MODE (part
[1][1]) == SImode
)
11785 switch (GET_CODE (part
[1][1]))
11788 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
11792 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
11796 gcc_unreachable ();
11799 if (GET_MODE (part
[1][0]) == SImode
)
11800 part
[1][0] = part
[1][1];
11803 emit_move_insn (part
[0][1], part
[1][1]);
11804 emit_move_insn (part
[0][0], part
[1][0]);
11808 /* Choose correct order to not overwrite the source before it is copied. */
11809 if ((REG_P (part
[0][0])
11810 && REG_P (part
[1][1])
11811 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
11813 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
11815 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
11819 operands
[2] = part
[0][2];
11820 operands
[3] = part
[0][1];
11821 operands
[4] = part
[0][0];
11822 operands
[5] = part
[1][2];
11823 operands
[6] = part
[1][1];
11824 operands
[7] = part
[1][0];
11828 operands
[2] = part
[0][1];
11829 operands
[3] = part
[0][0];
11830 operands
[5] = part
[1][1];
11831 operands
[6] = part
[1][0];
11838 operands
[2] = part
[0][0];
11839 operands
[3] = part
[0][1];
11840 operands
[4] = part
[0][2];
11841 operands
[5] = part
[1][0];
11842 operands
[6] = part
[1][1];
11843 operands
[7] = part
[1][2];
11847 operands
[2] = part
[0][0];
11848 operands
[3] = part
[0][1];
11849 operands
[5] = part
[1][0];
11850 operands
[6] = part
[1][1];
11854 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
11857 if (GET_CODE (operands
[5]) == CONST_INT
11858 && operands
[5] != const0_rtx
11859 && REG_P (operands
[2]))
11861 if (GET_CODE (operands
[6]) == CONST_INT
11862 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
11863 operands
[6] = operands
[2];
11866 && GET_CODE (operands
[7]) == CONST_INT
11867 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
11868 operands
[7] = operands
[2];
11872 && GET_CODE (operands
[6]) == CONST_INT
11873 && operands
[6] != const0_rtx
11874 && REG_P (operands
[3])
11875 && GET_CODE (operands
[7]) == CONST_INT
11876 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
11877 operands
[7] = operands
[3];
11880 emit_move_insn (operands
[2], operands
[5]);
11881 emit_move_insn (operands
[3], operands
[6]);
11883 emit_move_insn (operands
[4], operands
[7]);
11888 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
11889 left shift by a constant, either using a single shift or
11890 a sequence of add instructions. */
11893 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
11897 emit_insn ((mode
== DImode
11899 : gen_adddi3
) (operand
, operand
, operand
));
11901 else if (!optimize_size
11902 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
11905 for (i
=0; i
<count
; i
++)
11907 emit_insn ((mode
== DImode
11909 : gen_adddi3
) (operand
, operand
, operand
));
11913 emit_insn ((mode
== DImode
11915 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
11919 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
11921 rtx low
[2], high
[2];
11923 const int single_width
= mode
== DImode
? 32 : 64;
11925 if (GET_CODE (operands
[2]) == CONST_INT
)
11927 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
11928 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
11930 if (count
>= single_width
)
11932 emit_move_insn (high
[0], low
[1]);
11933 emit_move_insn (low
[0], const0_rtx
);
11935 if (count
> single_width
)
11936 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
11940 if (!rtx_equal_p (operands
[0], operands
[1]))
11941 emit_move_insn (operands
[0], operands
[1]);
11942 emit_insn ((mode
== DImode
11944 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
11945 ix86_expand_ashl_const (low
[0], count
, mode
);
11950 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
11952 if (operands
[1] == const1_rtx
)
11954 /* Assuming we've chosen a QImode capable registers, then 1 << N
11955 can be done with two 32/64-bit shifts, no branches, no cmoves. */
11956 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
11958 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
11960 ix86_expand_clear (low
[0]);
11961 ix86_expand_clear (high
[0]);
11962 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
11964 d
= gen_lowpart (QImode
, low
[0]);
11965 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
11966 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
11967 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
11969 d
= gen_lowpart (QImode
, high
[0]);
11970 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
11971 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
11972 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
11975 /* Otherwise, we can get the same results by manually performing
11976 a bit extract operation on bit 5/6, and then performing the two
11977 shifts. The two methods of getting 0/1 into low/high are exactly
11978 the same size. Avoiding the shift in the bit extract case helps
11979 pentium4 a bit; no one else seems to care much either way. */
11984 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
11985 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
11987 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
11988 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
11990 emit_insn ((mode
== DImode
11992 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
11993 emit_insn ((mode
== DImode
11995 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
11996 emit_move_insn (low
[0], high
[0]);
11997 emit_insn ((mode
== DImode
11999 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
12002 emit_insn ((mode
== DImode
12004 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12005 emit_insn ((mode
== DImode
12007 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
12011 if (operands
[1] == constm1_rtx
)
12013 /* For -1 << N, we can avoid the shld instruction, because we
12014 know that we're shifting 0...31/63 ones into a -1. */
12015 emit_move_insn (low
[0], constm1_rtx
);
12017 emit_move_insn (high
[0], low
[0]);
12019 emit_move_insn (high
[0], constm1_rtx
);
12023 if (!rtx_equal_p (operands
[0], operands
[1]))
12024 emit_move_insn (operands
[0], operands
[1]);
12026 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12027 emit_insn ((mode
== DImode
12029 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
12032 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12034 if (TARGET_CMOVE
&& scratch
)
12036 ix86_expand_clear (scratch
);
12037 emit_insn ((mode
== DImode
12038 ? gen_x86_shift_adj_1
12039 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
12042 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
12046 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12048 rtx low
[2], high
[2];
12050 const int single_width
= mode
== DImode
? 32 : 64;
12052 if (GET_CODE (operands
[2]) == CONST_INT
)
12054 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12055 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12057 if (count
== single_width
* 2 - 1)
12059 emit_move_insn (high
[0], high
[1]);
12060 emit_insn ((mode
== DImode
12062 : gen_ashrdi3
) (high
[0], high
[0],
12063 GEN_INT (single_width
- 1)));
12064 emit_move_insn (low
[0], high
[0]);
12067 else if (count
>= single_width
)
12069 emit_move_insn (low
[0], high
[1]);
12070 emit_move_insn (high
[0], low
[0]);
12071 emit_insn ((mode
== DImode
12073 : gen_ashrdi3
) (high
[0], high
[0],
12074 GEN_INT (single_width
- 1)));
12075 if (count
> single_width
)
12076 emit_insn ((mode
== DImode
12078 : gen_ashrdi3
) (low
[0], low
[0],
12079 GEN_INT (count
- single_width
)));
12083 if (!rtx_equal_p (operands
[0], operands
[1]))
12084 emit_move_insn (operands
[0], operands
[1]);
12085 emit_insn ((mode
== DImode
12087 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12088 emit_insn ((mode
== DImode
12090 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12095 if (!rtx_equal_p (operands
[0], operands
[1]))
12096 emit_move_insn (operands
[0], operands
[1]);
12098 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12100 emit_insn ((mode
== DImode
12102 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12103 emit_insn ((mode
== DImode
12105 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
12107 if (TARGET_CMOVE
&& scratch
)
12109 emit_move_insn (scratch
, high
[0]);
12110 emit_insn ((mode
== DImode
12112 : gen_ashrdi3
) (scratch
, scratch
,
12113 GEN_INT (single_width
- 1)));
12114 emit_insn ((mode
== DImode
12115 ? gen_x86_shift_adj_1
12116 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12120 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
12125 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12127 rtx low
[2], high
[2];
12129 const int single_width
= mode
== DImode
? 32 : 64;
12131 if (GET_CODE (operands
[2]) == CONST_INT
)
12133 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12134 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12136 if (count
>= single_width
)
12138 emit_move_insn (low
[0], high
[1]);
12139 ix86_expand_clear (high
[0]);
12141 if (count
> single_width
)
12142 emit_insn ((mode
== DImode
12144 : gen_lshrdi3
) (low
[0], low
[0],
12145 GEN_INT (count
- single_width
)));
12149 if (!rtx_equal_p (operands
[0], operands
[1]))
12150 emit_move_insn (operands
[0], operands
[1]);
12151 emit_insn ((mode
== DImode
12153 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12154 emit_insn ((mode
== DImode
12156 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12161 if (!rtx_equal_p (operands
[0], operands
[1]))
12162 emit_move_insn (operands
[0], operands
[1]);
12164 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12166 emit_insn ((mode
== DImode
12168 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12169 emit_insn ((mode
== DImode
12171 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
12173 /* Heh. By reversing the arguments, we can reuse this pattern. */
12174 if (TARGET_CMOVE
&& scratch
)
12176 ix86_expand_clear (scratch
);
12177 emit_insn ((mode
== DImode
12178 ? gen_x86_shift_adj_1
12179 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12183 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
12187 /* Helper function for the string operations below. Dest VARIABLE whether
12188 it is aligned to VALUE bytes. If true, jump to the label. */
12190 ix86_expand_aligntest (rtx variable
, int value
)
12192 rtx label
= gen_label_rtx ();
12193 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
12194 if (GET_MODE (variable
) == DImode
)
12195 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
12197 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
12198 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
12203 /* Adjust COUNTER by the VALUE. */
12205 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
12207 if (GET_MODE (countreg
) == DImode
)
12208 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
12210 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
12213 /* Zero extend possibly SImode EXP to Pmode register. */
12215 ix86_zero_extend_to_Pmode (rtx exp
)
12218 if (GET_MODE (exp
) == VOIDmode
)
12219 return force_reg (Pmode
, exp
);
12220 if (GET_MODE (exp
) == Pmode
)
12221 return copy_to_mode_reg (Pmode
, exp
);
12222 r
= gen_reg_rtx (Pmode
);
12223 emit_insn (gen_zero_extendsidi2 (r
, exp
));
12227 /* Expand string move (memcpy) operation. Use i386 string operations when
12228 profitable. expand_clrmem contains similar code. */
12230 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
12232 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
12233 enum machine_mode counter_mode
;
12234 HOST_WIDE_INT align
= 0;
12235 unsigned HOST_WIDE_INT count
= 0;
12237 if (GET_CODE (align_exp
) == CONST_INT
)
12238 align
= INTVAL (align_exp
);
12240 /* Can't use any of this if the user has appropriated esi or edi. */
12241 if (global_regs
[4] || global_regs
[5])
12244 /* This simple hack avoids all inlining code and simplifies code below. */
12245 if (!TARGET_ALIGN_STRINGOPS
)
12248 if (GET_CODE (count_exp
) == CONST_INT
)
12250 count
= INTVAL (count_exp
);
12251 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
12255 /* Figure out proper mode for counter. For 32bits it is always SImode,
12256 for 64bits use SImode when possible, otherwise DImode.
12257 Set count to number of bytes copied when known at compile time. */
12259 || GET_MODE (count_exp
) == SImode
12260 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
12261 counter_mode
= SImode
;
12263 counter_mode
= DImode
;
12265 gcc_assert (counter_mode
== SImode
|| counter_mode
== DImode
);
12267 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12268 if (destreg
!= XEXP (dst
, 0))
12269 dst
= replace_equiv_address_nv (dst
, destreg
);
12270 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
12271 if (srcreg
!= XEXP (src
, 0))
12272 src
= replace_equiv_address_nv (src
, srcreg
);
12274 /* When optimizing for size emit simple rep ; movsb instruction for
12275 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12276 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12277 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12278 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12279 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12280 known to be zero or not. The rep; movsb sequence causes higher
12281 register pressure though, so take that into account. */
12283 if ((!optimize
|| optimize_size
)
12288 || (count
& 3) + count
/ 4 > 6))))
12290 emit_insn (gen_cld ());
12291 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
12292 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
12293 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
12294 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
12298 /* For constant aligned (or small unaligned) copies use rep movsl
12299 followed by code copying the rest. For PentiumPro ensure 8 byte
12300 alignment to allow rep movsl acceleration. */
12302 else if (count
!= 0
12304 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
12305 || optimize_size
|| count
< (unsigned int) 64))
12307 unsigned HOST_WIDE_INT offset
= 0;
12308 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
12309 rtx srcmem
, dstmem
;
12311 emit_insn (gen_cld ());
12312 if (count
& ~(size
- 1))
12314 if ((TARGET_SINGLE_STRINGOP
|| optimize_size
) && count
< 5 * 4)
12316 enum machine_mode movs_mode
= size
== 4 ? SImode
: DImode
;
12318 while (offset
< (count
& ~(size
- 1)))
12320 srcmem
= adjust_automodify_address_nv (src
, movs_mode
,
12322 dstmem
= adjust_automodify_address_nv (dst
, movs_mode
,
12324 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12330 countreg
= GEN_INT ((count
>> (size
== 4 ? 2 : 3))
12331 & (TARGET_64BIT
? -1 : 0x3fffffff));
12332 countreg
= copy_to_mode_reg (counter_mode
, countreg
);
12333 countreg
= ix86_zero_extend_to_Pmode (countreg
);
12335 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
12336 GEN_INT (size
== 4 ? 2 : 3));
12337 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
12338 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12340 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
12341 countreg
, destexp
, srcexp
));
12342 offset
= count
& ~(size
- 1);
12345 if (size
== 8 && (count
& 0x04))
12347 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
12349 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
12351 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12356 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
12358 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
12360 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12365 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
12367 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
12369 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12372 /* The generic code based on the glibc implementation:
12373 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12374 allowing accelerated copying there)
12375 - copy the data using rep movsl
12376 - copy the rest. */
12381 rtx srcmem
, dstmem
;
12382 int desired_alignment
= (TARGET_PENTIUMPRO
12383 && (count
== 0 || count
>= (unsigned int) 260)
12384 ? 8 : UNITS_PER_WORD
);
12385 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12386 dst
= change_address (dst
, BLKmode
, destreg
);
12387 src
= change_address (src
, BLKmode
, srcreg
);
12389 /* In case we don't know anything about the alignment, default to
12390 library version, since it is usually equally fast and result in
12393 Also emit call when we know that the count is large and call overhead
12394 will not be important. */
12395 if (!TARGET_INLINE_ALL_STRINGOPS
12396 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
12399 if (TARGET_SINGLE_STRINGOP
)
12400 emit_insn (gen_cld ());
12402 countreg2
= gen_reg_rtx (Pmode
);
12403 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
12405 /* We don't use loops to align destination and to copy parts smaller
12406 than 4 bytes, because gcc is able to optimize such code better (in
12407 the case the destination or the count really is aligned, gcc is often
12408 able to predict the branches) and also it is friendlier to the
12409 hardware branch prediction.
12411 Using loops is beneficial for generic case, because we can
12412 handle small counts using the loops. Many CPUs (such as Athlon)
12413 have large REP prefix setup costs.
12415 This is quite costly. Maybe we can revisit this decision later or
12416 add some customizability to this code. */
12418 if (count
== 0 && align
< desired_alignment
)
12420 label
= gen_label_rtx ();
12421 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
12422 LEU
, 0, counter_mode
, 1, label
);
12426 rtx label
= ix86_expand_aligntest (destreg
, 1);
12427 srcmem
= change_address (src
, QImode
, srcreg
);
12428 dstmem
= change_address (dst
, QImode
, destreg
);
12429 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12430 ix86_adjust_counter (countreg
, 1);
12431 emit_label (label
);
12432 LABEL_NUSES (label
) = 1;
12436 rtx label
= ix86_expand_aligntest (destreg
, 2);
12437 srcmem
= change_address (src
, HImode
, srcreg
);
12438 dstmem
= change_address (dst
, HImode
, destreg
);
12439 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12440 ix86_adjust_counter (countreg
, 2);
12441 emit_label (label
);
12442 LABEL_NUSES (label
) = 1;
12444 if (align
<= 4 && desired_alignment
> 4)
12446 rtx label
= ix86_expand_aligntest (destreg
, 4);
12447 srcmem
= change_address (src
, SImode
, srcreg
);
12448 dstmem
= change_address (dst
, SImode
, destreg
);
12449 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12450 ix86_adjust_counter (countreg
, 4);
12451 emit_label (label
);
12452 LABEL_NUSES (label
) = 1;
12455 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
12457 emit_label (label
);
12458 LABEL_NUSES (label
) = 1;
12461 if (!TARGET_SINGLE_STRINGOP
)
12462 emit_insn (gen_cld ());
12465 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
12467 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
12471 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
12472 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
12474 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
12475 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12476 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
12477 countreg2
, destexp
, srcexp
));
12481 emit_label (label
);
12482 LABEL_NUSES (label
) = 1;
12484 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
12486 srcmem
= change_address (src
, SImode
, srcreg
);
12487 dstmem
= change_address (dst
, SImode
, destreg
);
12488 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12490 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
12492 rtx label
= ix86_expand_aligntest (countreg
, 4);
12493 srcmem
= change_address (src
, SImode
, srcreg
);
12494 dstmem
= change_address (dst
, SImode
, destreg
);
12495 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12496 emit_label (label
);
12497 LABEL_NUSES (label
) = 1;
12499 if (align
> 2 && count
!= 0 && (count
& 2))
12501 srcmem
= change_address (src
, HImode
, srcreg
);
12502 dstmem
= change_address (dst
, HImode
, destreg
);
12503 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12505 if (align
<= 2 || count
== 0)
12507 rtx label
= ix86_expand_aligntest (countreg
, 2);
12508 srcmem
= change_address (src
, HImode
, srcreg
);
12509 dstmem
= change_address (dst
, HImode
, destreg
);
12510 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12511 emit_label (label
);
12512 LABEL_NUSES (label
) = 1;
12514 if (align
> 1 && count
!= 0 && (count
& 1))
12516 srcmem
= change_address (src
, QImode
, srcreg
);
12517 dstmem
= change_address (dst
, QImode
, destreg
);
12518 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12520 if (align
<= 1 || count
== 0)
12522 rtx label
= ix86_expand_aligntest (countreg
, 1);
12523 srcmem
= change_address (src
, QImode
, srcreg
);
12524 dstmem
= change_address (dst
, QImode
, destreg
);
12525 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12526 emit_label (label
);
12527 LABEL_NUSES (label
) = 1;
12534 /* Expand string clear operation (bzero). Use i386 string operations when
12535 profitable. expand_movmem contains similar code. */
12537 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
12539 rtx destreg
, zeroreg
, countreg
, destexp
;
12540 enum machine_mode counter_mode
;
12541 HOST_WIDE_INT align
= 0;
12542 unsigned HOST_WIDE_INT count
= 0;
12544 if (GET_CODE (align_exp
) == CONST_INT
)
12545 align
= INTVAL (align_exp
);
12547 /* Can't use any of this if the user has appropriated esi. */
12548 if (global_regs
[4])
12551 /* This simple hack avoids all inlining code and simplifies code below. */
12552 if (!TARGET_ALIGN_STRINGOPS
)
12555 if (GET_CODE (count_exp
) == CONST_INT
)
12557 count
= INTVAL (count_exp
);
12558 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
12561 /* Figure out proper mode for counter. For 32bits it is always SImode,
12562 for 64bits use SImode when possible, otherwise DImode.
12563 Set count to number of bytes copied when known at compile time. */
12565 || GET_MODE (count_exp
) == SImode
12566 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
12567 counter_mode
= SImode
;
12569 counter_mode
= DImode
;
12571 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12572 if (destreg
!= XEXP (dst
, 0))
12573 dst
= replace_equiv_address_nv (dst
, destreg
);
12576 /* When optimizing for size emit simple rep ; movsb instruction for
12577 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12578 sequence is 7 bytes long, so if optimizing for size and count is
12579 small enough that some stosl, stosw and stosb instructions without
12580 rep are shorter, fall back into the next if. */
12582 if ((!optimize
|| optimize_size
)
12585 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
12587 emit_insn (gen_cld ());
12589 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
12590 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
12591 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
12592 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
12594 else if (count
!= 0
12596 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
12597 || optimize_size
|| count
< (unsigned int) 64))
12599 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
12600 unsigned HOST_WIDE_INT offset
= 0;
12602 emit_insn (gen_cld ());
12604 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
12605 if (count
& ~(size
- 1))
12607 unsigned HOST_WIDE_INT repcount
;
12608 unsigned int max_nonrep
;
12610 repcount
= count
>> (size
== 4 ? 2 : 3);
12612 repcount
&= 0x3fffffff;
12614 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12615 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12616 bytes. In both cases the latter seems to be faster for small
12618 max_nonrep
= size
== 4 ? 7 : 4;
12619 if (!optimize_size
)
12622 case PROCESSOR_PENTIUM4
:
12623 case PROCESSOR_NOCONA
:
12630 if (repcount
<= max_nonrep
)
12631 while (repcount
-- > 0)
12633 rtx mem
= adjust_automodify_address_nv (dst
,
12634 GET_MODE (zeroreg
),
12636 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
12641 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
12642 countreg
= ix86_zero_extend_to_Pmode (countreg
);
12643 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
12644 GEN_INT (size
== 4 ? 2 : 3));
12645 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12646 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
12648 offset
= count
& ~(size
- 1);
12651 if (size
== 8 && (count
& 0x04))
12653 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
12655 emit_insn (gen_strset (destreg
, mem
,
12656 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
12661 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
12663 emit_insn (gen_strset (destreg
, mem
,
12664 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
12669 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
12671 emit_insn (gen_strset (destreg
, mem
,
12672 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
12679 /* Compute desired alignment of the string operation. */
12680 int desired_alignment
= (TARGET_PENTIUMPRO
12681 && (count
== 0 || count
>= (unsigned int) 260)
12682 ? 8 : UNITS_PER_WORD
);
12684 /* In case we don't know anything about the alignment, default to
12685 library version, since it is usually equally fast and result in
12688 Also emit call when we know that the count is large and call overhead
12689 will not be important. */
12690 if (!TARGET_INLINE_ALL_STRINGOPS
12691 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
12694 if (TARGET_SINGLE_STRINGOP
)
12695 emit_insn (gen_cld ());
12697 countreg2
= gen_reg_rtx (Pmode
);
12698 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
12699 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
12700 /* Get rid of MEM_OFFSET, it won't be accurate. */
12701 dst
= change_address (dst
, BLKmode
, destreg
);
12703 if (count
== 0 && align
< desired_alignment
)
12705 label
= gen_label_rtx ();
12706 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
12707 LEU
, 0, counter_mode
, 1, label
);
12711 rtx label
= ix86_expand_aligntest (destreg
, 1);
12712 emit_insn (gen_strset (destreg
, dst
,
12713 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
12714 ix86_adjust_counter (countreg
, 1);
12715 emit_label (label
);
12716 LABEL_NUSES (label
) = 1;
12720 rtx label
= ix86_expand_aligntest (destreg
, 2);
12721 emit_insn (gen_strset (destreg
, dst
,
12722 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
12723 ix86_adjust_counter (countreg
, 2);
12724 emit_label (label
);
12725 LABEL_NUSES (label
) = 1;
12727 if (align
<= 4 && desired_alignment
> 4)
12729 rtx label
= ix86_expand_aligntest (destreg
, 4);
12730 emit_insn (gen_strset (destreg
, dst
,
12732 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
12734 ix86_adjust_counter (countreg
, 4);
12735 emit_label (label
);
12736 LABEL_NUSES (label
) = 1;
12739 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
12741 emit_label (label
);
12742 LABEL_NUSES (label
) = 1;
12746 if (!TARGET_SINGLE_STRINGOP
)
12747 emit_insn (gen_cld ());
12750 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
12752 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
12756 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
12757 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
12759 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12760 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
12764 emit_label (label
);
12765 LABEL_NUSES (label
) = 1;
12768 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
12769 emit_insn (gen_strset (destreg
, dst
,
12770 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
12771 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
12773 rtx label
= ix86_expand_aligntest (countreg
, 4);
12774 emit_insn (gen_strset (destreg
, dst
,
12775 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
12776 emit_label (label
);
12777 LABEL_NUSES (label
) = 1;
12779 if (align
> 2 && count
!= 0 && (count
& 2))
12780 emit_insn (gen_strset (destreg
, dst
,
12781 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
12782 if (align
<= 2 || count
== 0)
12784 rtx label
= ix86_expand_aligntest (countreg
, 2);
12785 emit_insn (gen_strset (destreg
, dst
,
12786 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
12787 emit_label (label
);
12788 LABEL_NUSES (label
) = 1;
12790 if (align
> 1 && count
!= 0 && (count
& 1))
12791 emit_insn (gen_strset (destreg
, dst
,
12792 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
12793 if (align
<= 1 || count
== 0)
12795 rtx label
= ix86_expand_aligntest (countreg
, 1);
12796 emit_insn (gen_strset (destreg
, dst
,
12797 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
12798 emit_label (label
);
12799 LABEL_NUSES (label
) = 1;
12805 /* Expand strlen. */
12807 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
12809 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
12811 /* The generic case of strlen expander is long. Avoid it's
12812 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
12814 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
12815 && !TARGET_INLINE_ALL_STRINGOPS
12817 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
12820 addr
= force_reg (Pmode
, XEXP (src
, 0));
12821 scratch1
= gen_reg_rtx (Pmode
);
12823 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
12826 /* Well it seems that some optimizer does not combine a call like
12827 foo(strlen(bar), strlen(bar));
12828 when the move and the subtraction is done here. It does calculate
12829 the length just once when these instructions are done inside of
12830 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
12831 often used and I use one fewer register for the lifetime of
12832 output_strlen_unroll() this is better. */
12834 emit_move_insn (out
, addr
);
12836 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
12838 /* strlensi_unroll_1 returns the address of the zero at the end of
12839 the string, like memchr(), so compute the length by subtracting
12840 the start address. */
12842 emit_insn (gen_subdi3 (out
, out
, addr
));
12844 emit_insn (gen_subsi3 (out
, out
, addr
));
12849 scratch2
= gen_reg_rtx (Pmode
);
12850 scratch3
= gen_reg_rtx (Pmode
);
12851 scratch4
= force_reg (Pmode
, constm1_rtx
);
12853 emit_move_insn (scratch3
, addr
);
12854 eoschar
= force_reg (QImode
, eoschar
);
12856 emit_insn (gen_cld ());
12857 src
= replace_equiv_address_nv (src
, scratch3
);
12859 /* If .md starts supporting :P, this can be done in .md. */
12860 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
12861 scratch4
), UNSPEC_SCAS
);
12862 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
12865 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
12866 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
12870 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
12871 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
12877 /* Expand the appropriate insns for doing strlen if not just doing
12880 out = result, initialized with the start address
12881 align_rtx = alignment of the address.
12882 scratch = scratch register, initialized with the startaddress when
12883 not aligned, otherwise undefined
12885 This is just the body. It needs the initializations mentioned above and
12886 some address computing at the end. These things are done in i386.md. */
12889 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
12893 rtx align_2_label
= NULL_RTX
;
12894 rtx align_3_label
= NULL_RTX
;
12895 rtx align_4_label
= gen_label_rtx ();
12896 rtx end_0_label
= gen_label_rtx ();
12898 rtx tmpreg
= gen_reg_rtx (SImode
);
12899 rtx scratch
= gen_reg_rtx (SImode
);
12903 if (GET_CODE (align_rtx
) == CONST_INT
)
12904 align
= INTVAL (align_rtx
);
12906 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
12908 /* Is there a known alignment and is it less than 4? */
12911 rtx scratch1
= gen_reg_rtx (Pmode
);
12912 emit_move_insn (scratch1
, out
);
12913 /* Is there a known alignment and is it not 2? */
12916 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
12917 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
12919 /* Leave just the 3 lower bits. */
12920 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
12921 NULL_RTX
, 0, OPTAB_WIDEN
);
12923 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
12924 Pmode
, 1, align_4_label
);
12925 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
12926 Pmode
, 1, align_2_label
);
12927 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
12928 Pmode
, 1, align_3_label
);
12932 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12933 check if is aligned to 4 - byte. */
12935 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
12936 NULL_RTX
, 0, OPTAB_WIDEN
);
12938 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
12939 Pmode
, 1, align_4_label
);
12942 mem
= change_address (src
, QImode
, out
);
12944 /* Now compare the bytes. */
12946 /* Compare the first n unaligned byte on a byte per byte basis. */
12947 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
12948 QImode
, 1, end_0_label
);
12950 /* Increment the address. */
12952 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
12954 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
12956 /* Not needed with an alignment of 2 */
12959 emit_label (align_2_label
);
12961 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
12965 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
12967 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
12969 emit_label (align_3_label
);
12972 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
12976 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
12978 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
12981 /* Generate loop to check 4 bytes at a time. It is not a good idea to
12982 align this loop. It gives only huge programs, but does not help to
12984 emit_label (align_4_label
);
12986 mem
= change_address (src
, SImode
, out
);
12987 emit_move_insn (scratch
, mem
);
12989 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
12991 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
12993 /* This formula yields a nonzero result iff one of the bytes is zero.
12994 This saves three branches inside loop and many cycles. */
12996 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
12997 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
12998 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
12999 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
13000 gen_int_mode (0x80808080, SImode
)));
13001 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
13006 rtx reg
= gen_reg_rtx (SImode
);
13007 rtx reg2
= gen_reg_rtx (Pmode
);
13008 emit_move_insn (reg
, tmpreg
);
13009 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
13011 /* If zero is not in the first two bytes, move two bytes forward. */
13012 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13013 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13014 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13015 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
13016 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
13019 /* Emit lea manually to avoid clobbering of flags. */
13020 emit_insn (gen_rtx_SET (SImode
, reg2
,
13021 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
13023 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13024 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13025 emit_insn (gen_rtx_SET (VOIDmode
, out
,
13026 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
13033 rtx end_2_label
= gen_label_rtx ();
13034 /* Is zero in the first two bytes? */
13036 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13037 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13038 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
13039 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
13040 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
13042 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
13043 JUMP_LABEL (tmp
) = end_2_label
;
13045 /* Not in the first two. Move two bytes forward. */
13046 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
13048 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
13050 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
13052 emit_label (end_2_label
);
13056 /* Avoid branch in fixing the byte. */
13057 tmpreg
= gen_lowpart (QImode
, tmpreg
);
13058 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
13059 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
13061 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
13063 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
13065 emit_label (end_0_label
);
13069 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
13070 rtx callarg2 ATTRIBUTE_UNUSED
,
13071 rtx pop
, int sibcall
)
13073 rtx use
= NULL
, call
;
13075 if (pop
== const0_rtx
)
13077 gcc_assert (!TARGET_64BIT
|| !pop
);
13080 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
13081 fnaddr
= machopic_indirect_call_target (fnaddr
);
13083 /* Static functions and indirect calls don't need the pic register. */
13084 if (! TARGET_64BIT
&& flag_pic
13085 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
13086 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
13087 use_reg (&use
, pic_offset_table_rtx
);
13089 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
13091 rtx al
= gen_rtx_REG (QImode
, 0);
13092 emit_move_insn (al
, callarg2
);
13093 use_reg (&use
, al
);
13095 #endif /* TARGET_MACHO */
13097 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
13099 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13100 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13102 if (sibcall
&& TARGET_64BIT
13103 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
13106 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13107 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
13108 emit_move_insn (fnaddr
, addr
);
13109 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13112 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
13114 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
13117 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
13118 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
13119 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
13122 call
= emit_call_insn (call
);
13124 CALL_INSN_FUNCTION_USAGE (call
) = use
;
13128 /* Clear stack slot assignments remembered from previous functions.
13129 This is called from INIT_EXPANDERS once before RTL is emitted for each
13132 static struct machine_function
*
13133 ix86_init_machine_status (void)
13135 struct machine_function
*f
;
13137 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
13138 f
->use_fast_prologue_epilogue_nregs
= -1;
13139 f
->tls_descriptor_call_expanded_p
= 0;
13144 /* Return a MEM corresponding to a stack slot with mode MODE.
13145 Allocate a new slot if necessary.
13147 The RTL for a function can have several slots available: N is
13148 which slot to use. */
13151 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
13153 struct stack_local_entry
*s
;
13155 gcc_assert (n
< MAX_386_STACK_LOCALS
);
13157 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
13158 if (s
->mode
== mode
&& s
->n
== n
)
13161 s
= (struct stack_local_entry
*)
13162 ggc_alloc (sizeof (struct stack_local_entry
));
13165 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
13167 s
->next
= ix86_stack_locals
;
13168 ix86_stack_locals
= s
;
13172 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13174 static GTY(()) rtx ix86_tls_symbol
;
13176 ix86_tls_get_addr (void)
13179 if (!ix86_tls_symbol
)
13181 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13182 (TARGET_ANY_GNU_TLS
13184 ? "___tls_get_addr"
13185 : "__tls_get_addr");
13188 return ix86_tls_symbol
;
13191 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13193 static GTY(()) rtx ix86_tls_module_base_symbol
;
13195 ix86_tls_module_base (void)
13198 if (!ix86_tls_module_base_symbol
)
13200 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13201 "_TLS_MODULE_BASE_");
13202 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13203 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13206 return ix86_tls_module_base_symbol
;
13209 /* Calculate the length of the memory address in the instruction
13210 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13213 memory_address_length (rtx addr
)
13215 struct ix86_address parts
;
13216 rtx base
, index
, disp
;
13220 if (GET_CODE (addr
) == PRE_DEC
13221 || GET_CODE (addr
) == POST_INC
13222 || GET_CODE (addr
) == PRE_MODIFY
13223 || GET_CODE (addr
) == POST_MODIFY
)
13226 ok
= ix86_decompose_address (addr
, &parts
);
13229 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
13230 parts
.base
= SUBREG_REG (parts
.base
);
13231 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
13232 parts
.index
= SUBREG_REG (parts
.index
);
13235 index
= parts
.index
;
13240 - esp as the base always wants an index,
13241 - ebp as the base always wants a displacement. */
13243 /* Register Indirect. */
13244 if (base
&& !index
&& !disp
)
13246 /* esp (for its index) and ebp (for its displacement) need
13247 the two-byte modrm form. */
13248 if (addr
== stack_pointer_rtx
13249 || addr
== arg_pointer_rtx
13250 || addr
== frame_pointer_rtx
13251 || addr
== hard_frame_pointer_rtx
)
13255 /* Direct Addressing. */
13256 else if (disp
&& !base
&& !index
)
13261 /* Find the length of the displacement constant. */
13264 if (GET_CODE (disp
) == CONST_INT
13265 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
13271 /* ebp always wants a displacement. */
13272 else if (base
== hard_frame_pointer_rtx
)
13275 /* An index requires the two-byte modrm form.... */
13277 /* ...like esp, which always wants an index. */
13278 || base
== stack_pointer_rtx
13279 || base
== arg_pointer_rtx
13280 || base
== frame_pointer_rtx
)
13287 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13288 is set, expect that insn have 8bit immediate alternative. */
13290 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
13294 extract_insn_cached (insn
);
13295 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13296 if (CONSTANT_P (recog_data
.operand
[i
]))
13300 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
13301 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
13305 switch (get_attr_mode (insn
))
13316 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13321 fatal_insn ("unknown insn mode", insn
);
13327 /* Compute default value for "length_address" attribute. */
13329 ix86_attr_length_address_default (rtx insn
)
13333 if (get_attr_type (insn
) == TYPE_LEA
)
13335 rtx set
= PATTERN (insn
);
13337 if (GET_CODE (set
) == PARALLEL
)
13338 set
= XVECEXP (set
, 0, 0);
13340 gcc_assert (GET_CODE (set
) == SET
);
13342 return memory_address_length (SET_SRC (set
));
13345 extract_insn_cached (insn
);
13346 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13347 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13349 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
13355 /* Return the maximum number of instructions a cpu can issue. */
13358 ix86_issue_rate (void)
13362 case PROCESSOR_PENTIUM
:
13366 case PROCESSOR_PENTIUMPRO
:
13367 case PROCESSOR_PENTIUM4
:
13368 case PROCESSOR_ATHLON
:
13370 case PROCESSOR_NOCONA
:
13371 case PROCESSOR_GENERIC32
:
13372 case PROCESSOR_GENERIC64
:
13380 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13381 by DEP_INSN and nothing set by DEP_INSN. */
13384 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13388 /* Simplify the test for uninteresting insns. */
13389 if (insn_type
!= TYPE_SETCC
13390 && insn_type
!= TYPE_ICMOV
13391 && insn_type
!= TYPE_FCMOV
13392 && insn_type
!= TYPE_IBR
)
13395 if ((set
= single_set (dep_insn
)) != 0)
13397 set
= SET_DEST (set
);
13400 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
13401 && XVECLEN (PATTERN (dep_insn
), 0) == 2
13402 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
13403 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
13405 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13406 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13411 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
13414 /* This test is true if the dependent insn reads the flags but
13415 not any other potentially set register. */
13416 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
13419 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
13425 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13426 address with operands set by DEP_INSN. */
13429 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13433 if (insn_type
== TYPE_LEA
13436 addr
= PATTERN (insn
);
13438 if (GET_CODE (addr
) == PARALLEL
)
13439 addr
= XVECEXP (addr
, 0, 0);
13441 gcc_assert (GET_CODE (addr
) == SET
);
13443 addr
= SET_SRC (addr
);
13448 extract_insn_cached (insn
);
13449 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13450 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13452 addr
= XEXP (recog_data
.operand
[i
], 0);
13459 return modified_in_p (addr
, dep_insn
);
13463 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
13465 enum attr_type insn_type
, dep_insn_type
;
13466 enum attr_memory memory
;
13468 int dep_insn_code_number
;
13470 /* Anti and output dependencies have zero cost on all CPUs. */
13471 if (REG_NOTE_KIND (link
) != 0)
13474 dep_insn_code_number
= recog_memoized (dep_insn
);
13476 /* If we can't recognize the insns, we can't really do anything. */
13477 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
13480 insn_type
= get_attr_type (insn
);
13481 dep_insn_type
= get_attr_type (dep_insn
);
13485 case PROCESSOR_PENTIUM
:
13486 /* Address Generation Interlock adds a cycle of latency. */
13487 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
13490 /* ??? Compares pair with jump/setcc. */
13491 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
13494 /* Floating point stores require value to be ready one cycle earlier. */
13495 if (insn_type
== TYPE_FMOV
13496 && get_attr_memory (insn
) == MEMORY_STORE
13497 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
13501 case PROCESSOR_PENTIUMPRO
:
13502 memory
= get_attr_memory (insn
);
13504 /* INT->FP conversion is expensive. */
13505 if (get_attr_fp_int_src (dep_insn
))
13508 /* There is one cycle extra latency between an FP op and a store. */
13509 if (insn_type
== TYPE_FMOV
13510 && (set
= single_set (dep_insn
)) != NULL_RTX
13511 && (set2
= single_set (insn
)) != NULL_RTX
13512 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
13513 && GET_CODE (SET_DEST (set2
)) == MEM
)
13516 /* Show ability of reorder buffer to hide latency of load by executing
13517 in parallel with previous instruction in case
13518 previous instruction is not needed to compute the address. */
13519 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13520 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
13522 /* Claim moves to take one cycle, as core can issue one load
13523 at time and the next load can start cycle later. */
13524 if (dep_insn_type
== TYPE_IMOV
13525 || dep_insn_type
== TYPE_FMOV
)
13533 memory
= get_attr_memory (insn
);
13535 /* The esp dependency is resolved before the instruction is really
13537 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
13538 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
13541 /* INT->FP conversion is expensive. */
13542 if (get_attr_fp_int_src (dep_insn
))
13545 /* Show ability of reorder buffer to hide latency of load by executing
13546 in parallel with previous instruction in case
13547 previous instruction is not needed to compute the address. */
13548 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13549 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
13551 /* Claim moves to take one cycle, as core can issue one load
13552 at time and the next load can start cycle later. */
13553 if (dep_insn_type
== TYPE_IMOV
13554 || dep_insn_type
== TYPE_FMOV
)
13563 case PROCESSOR_ATHLON
:
13565 case PROCESSOR_GENERIC32
:
13566 case PROCESSOR_GENERIC64
:
13567 memory
= get_attr_memory (insn
);
13569 /* Show ability of reorder buffer to hide latency of load by executing
13570 in parallel with previous instruction in case
13571 previous instruction is not needed to compute the address. */
13572 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13573 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
13575 enum attr_unit unit
= get_attr_unit (insn
);
13578 /* Because of the difference between the length of integer and
13579 floating unit pipeline preparation stages, the memory operands
13580 for floating point are cheaper.
13582 ??? For Athlon it the difference is most probably 2. */
13583 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
13586 loadcost
= TARGET_ATHLON
? 2 : 0;
13588 if (cost
>= loadcost
)
13601 /* How many alternative schedules to try. This should be as wide as the
13602 scheduling freedom in the DFA, but no wider. Making this value too
13603 large results extra work for the scheduler. */
13606 ia32_multipass_dfa_lookahead (void)
13608 if (ix86_tune
== PROCESSOR_PENTIUM
)
13611 if (ix86_tune
== PROCESSOR_PENTIUMPRO
13612 || ix86_tune
== PROCESSOR_K6
)
13620 /* Compute the alignment given to a constant that is being placed in memory.
13621 EXP is the constant and ALIGN is the alignment that the object would
13623 The value of this function is used instead of that alignment to align
13627 ix86_constant_alignment (tree exp
, int align
)
13629 if (TREE_CODE (exp
) == REAL_CST
)
13631 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
13633 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
13636 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
13637 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
13638 return BITS_PER_WORD
;
13643 /* Compute the alignment for a static variable.
13644 TYPE is the data type, and ALIGN is the alignment that
13645 the object would ordinarily have. The value of this function is used
13646 instead of that alignment to align the object. */
13649 ix86_data_alignment (tree type
, int align
)
13651 int max_align
= optimize_size
? BITS_PER_WORD
: 256;
13653 if (AGGREGATE_TYPE_P (type
)
13654 && TYPE_SIZE (type
)
13655 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
13656 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
13657 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
13658 && align
< max_align
)
13661 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13662 to 16byte boundary. */
13665 if (AGGREGATE_TYPE_P (type
)
13666 && TYPE_SIZE (type
)
13667 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
13668 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
13669 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
13673 if (TREE_CODE (type
) == ARRAY_TYPE
)
13675 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
13677 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
13680 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
13683 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
13685 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
13688 else if ((TREE_CODE (type
) == RECORD_TYPE
13689 || TREE_CODE (type
) == UNION_TYPE
13690 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
13691 && TYPE_FIELDS (type
))
13693 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
13695 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
13698 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
13699 || TREE_CODE (type
) == INTEGER_TYPE
)
13701 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
13703 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
13710 /* Compute the alignment for a local variable.
13711 TYPE is the data type, and ALIGN is the alignment that
13712 the object would ordinarily have. The value of this macro is used
13713 instead of that alignment to align the object. */
13716 ix86_local_alignment (tree type
, int align
)
13718 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13719 to 16byte boundary. */
13722 if (AGGREGATE_TYPE_P (type
)
13723 && TYPE_SIZE (type
)
13724 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
13725 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
13726 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
13729 if (TREE_CODE (type
) == ARRAY_TYPE
)
13731 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
13733 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
13736 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
13738 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
13740 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
13743 else if ((TREE_CODE (type
) == RECORD_TYPE
13744 || TREE_CODE (type
) == UNION_TYPE
13745 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
13746 && TYPE_FIELDS (type
))
13748 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
13750 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
13753 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
13754 || TREE_CODE (type
) == INTEGER_TYPE
)
13757 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
13759 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
13765 /* Emit RTL insns to initialize the variable parts of a trampoline.
13766 FNADDR is an RTX for the address of the function's pure code.
13767 CXT is an RTX for the static chain value for the function. */
13769 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
13773 /* Compute offset from the end of the jmp to the target function. */
13774 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
13775 plus_constant (tramp
, 10),
13776 NULL_RTX
, 1, OPTAB_DIRECT
);
13777 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
13778 gen_int_mode (0xb9, QImode
));
13779 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
13780 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
13781 gen_int_mode (0xe9, QImode
));
13782 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
13787 /* Try to load address using shorter movl instead of movabs.
13788 We may want to support movq for kernel mode, but kernel does not use
13789 trampolines at the moment. */
13790 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
13792 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
13793 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
13794 gen_int_mode (0xbb41, HImode
));
13795 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
13796 gen_lowpart (SImode
, fnaddr
));
13801 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
13802 gen_int_mode (0xbb49, HImode
));
13803 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
13807 /* Load static chain using movabs to r10. */
13808 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
13809 gen_int_mode (0xba49, HImode
));
13810 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
13813 /* Jump to the r11 */
13814 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
13815 gen_int_mode (0xff49, HImode
));
13816 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
13817 gen_int_mode (0xe3, QImode
));
13819 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
13822 #ifdef ENABLE_EXECUTE_STACK
13823 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
13824 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
13828 /* Codes for all the SSE/MMX builtins. */
13831 IX86_BUILTIN_ADDPS
,
13832 IX86_BUILTIN_ADDSS
,
13833 IX86_BUILTIN_DIVPS
,
13834 IX86_BUILTIN_DIVSS
,
13835 IX86_BUILTIN_MULPS
,
13836 IX86_BUILTIN_MULSS
,
13837 IX86_BUILTIN_SUBPS
,
13838 IX86_BUILTIN_SUBSS
,
13840 IX86_BUILTIN_CMPEQPS
,
13841 IX86_BUILTIN_CMPLTPS
,
13842 IX86_BUILTIN_CMPLEPS
,
13843 IX86_BUILTIN_CMPGTPS
,
13844 IX86_BUILTIN_CMPGEPS
,
13845 IX86_BUILTIN_CMPNEQPS
,
13846 IX86_BUILTIN_CMPNLTPS
,
13847 IX86_BUILTIN_CMPNLEPS
,
13848 IX86_BUILTIN_CMPNGTPS
,
13849 IX86_BUILTIN_CMPNGEPS
,
13850 IX86_BUILTIN_CMPORDPS
,
13851 IX86_BUILTIN_CMPUNORDPS
,
13852 IX86_BUILTIN_CMPEQSS
,
13853 IX86_BUILTIN_CMPLTSS
,
13854 IX86_BUILTIN_CMPLESS
,
13855 IX86_BUILTIN_CMPNEQSS
,
13856 IX86_BUILTIN_CMPNLTSS
,
13857 IX86_BUILTIN_CMPNLESS
,
13858 IX86_BUILTIN_CMPNGTSS
,
13859 IX86_BUILTIN_CMPNGESS
,
13860 IX86_BUILTIN_CMPORDSS
,
13861 IX86_BUILTIN_CMPUNORDSS
,
13863 IX86_BUILTIN_COMIEQSS
,
13864 IX86_BUILTIN_COMILTSS
,
13865 IX86_BUILTIN_COMILESS
,
13866 IX86_BUILTIN_COMIGTSS
,
13867 IX86_BUILTIN_COMIGESS
,
13868 IX86_BUILTIN_COMINEQSS
,
13869 IX86_BUILTIN_UCOMIEQSS
,
13870 IX86_BUILTIN_UCOMILTSS
,
13871 IX86_BUILTIN_UCOMILESS
,
13872 IX86_BUILTIN_UCOMIGTSS
,
13873 IX86_BUILTIN_UCOMIGESS
,
13874 IX86_BUILTIN_UCOMINEQSS
,
13876 IX86_BUILTIN_CVTPI2PS
,
13877 IX86_BUILTIN_CVTPS2PI
,
13878 IX86_BUILTIN_CVTSI2SS
,
13879 IX86_BUILTIN_CVTSI642SS
,
13880 IX86_BUILTIN_CVTSS2SI
,
13881 IX86_BUILTIN_CVTSS2SI64
,
13882 IX86_BUILTIN_CVTTPS2PI
,
13883 IX86_BUILTIN_CVTTSS2SI
,
13884 IX86_BUILTIN_CVTTSS2SI64
,
13886 IX86_BUILTIN_MAXPS
,
13887 IX86_BUILTIN_MAXSS
,
13888 IX86_BUILTIN_MINPS
,
13889 IX86_BUILTIN_MINSS
,
13891 IX86_BUILTIN_LOADUPS
,
13892 IX86_BUILTIN_STOREUPS
,
13893 IX86_BUILTIN_MOVSS
,
13895 IX86_BUILTIN_MOVHLPS
,
13896 IX86_BUILTIN_MOVLHPS
,
13897 IX86_BUILTIN_LOADHPS
,
13898 IX86_BUILTIN_LOADLPS
,
13899 IX86_BUILTIN_STOREHPS
,
13900 IX86_BUILTIN_STORELPS
,
13902 IX86_BUILTIN_MASKMOVQ
,
13903 IX86_BUILTIN_MOVMSKPS
,
13904 IX86_BUILTIN_PMOVMSKB
,
13906 IX86_BUILTIN_MOVNTPS
,
13907 IX86_BUILTIN_MOVNTQ
,
13909 IX86_BUILTIN_LOADDQU
,
13910 IX86_BUILTIN_STOREDQU
,
13912 IX86_BUILTIN_PACKSSWB
,
13913 IX86_BUILTIN_PACKSSDW
,
13914 IX86_BUILTIN_PACKUSWB
,
13916 IX86_BUILTIN_PADDB
,
13917 IX86_BUILTIN_PADDW
,
13918 IX86_BUILTIN_PADDD
,
13919 IX86_BUILTIN_PADDQ
,
13920 IX86_BUILTIN_PADDSB
,
13921 IX86_BUILTIN_PADDSW
,
13922 IX86_BUILTIN_PADDUSB
,
13923 IX86_BUILTIN_PADDUSW
,
13924 IX86_BUILTIN_PSUBB
,
13925 IX86_BUILTIN_PSUBW
,
13926 IX86_BUILTIN_PSUBD
,
13927 IX86_BUILTIN_PSUBQ
,
13928 IX86_BUILTIN_PSUBSB
,
13929 IX86_BUILTIN_PSUBSW
,
13930 IX86_BUILTIN_PSUBUSB
,
13931 IX86_BUILTIN_PSUBUSW
,
13934 IX86_BUILTIN_PANDN
,
13938 IX86_BUILTIN_PAVGB
,
13939 IX86_BUILTIN_PAVGW
,
13941 IX86_BUILTIN_PCMPEQB
,
13942 IX86_BUILTIN_PCMPEQW
,
13943 IX86_BUILTIN_PCMPEQD
,
13944 IX86_BUILTIN_PCMPGTB
,
13945 IX86_BUILTIN_PCMPGTW
,
13946 IX86_BUILTIN_PCMPGTD
,
13948 IX86_BUILTIN_PMADDWD
,
13950 IX86_BUILTIN_PMAXSW
,
13951 IX86_BUILTIN_PMAXUB
,
13952 IX86_BUILTIN_PMINSW
,
13953 IX86_BUILTIN_PMINUB
,
13955 IX86_BUILTIN_PMULHUW
,
13956 IX86_BUILTIN_PMULHW
,
13957 IX86_BUILTIN_PMULLW
,
13959 IX86_BUILTIN_PSADBW
,
13960 IX86_BUILTIN_PSHUFW
,
13962 IX86_BUILTIN_PSLLW
,
13963 IX86_BUILTIN_PSLLD
,
13964 IX86_BUILTIN_PSLLQ
,
13965 IX86_BUILTIN_PSRAW
,
13966 IX86_BUILTIN_PSRAD
,
13967 IX86_BUILTIN_PSRLW
,
13968 IX86_BUILTIN_PSRLD
,
13969 IX86_BUILTIN_PSRLQ
,
13970 IX86_BUILTIN_PSLLWI
,
13971 IX86_BUILTIN_PSLLDI
,
13972 IX86_BUILTIN_PSLLQI
,
13973 IX86_BUILTIN_PSRAWI
,
13974 IX86_BUILTIN_PSRADI
,
13975 IX86_BUILTIN_PSRLWI
,
13976 IX86_BUILTIN_PSRLDI
,
13977 IX86_BUILTIN_PSRLQI
,
13979 IX86_BUILTIN_PUNPCKHBW
,
13980 IX86_BUILTIN_PUNPCKHWD
,
13981 IX86_BUILTIN_PUNPCKHDQ
,
13982 IX86_BUILTIN_PUNPCKLBW
,
13983 IX86_BUILTIN_PUNPCKLWD
,
13984 IX86_BUILTIN_PUNPCKLDQ
,
13986 IX86_BUILTIN_SHUFPS
,
13988 IX86_BUILTIN_RCPPS
,
13989 IX86_BUILTIN_RCPSS
,
13990 IX86_BUILTIN_RSQRTPS
,
13991 IX86_BUILTIN_RSQRTSS
,
13992 IX86_BUILTIN_SQRTPS
,
13993 IX86_BUILTIN_SQRTSS
,
13995 IX86_BUILTIN_UNPCKHPS
,
13996 IX86_BUILTIN_UNPCKLPS
,
13998 IX86_BUILTIN_ANDPS
,
13999 IX86_BUILTIN_ANDNPS
,
14001 IX86_BUILTIN_XORPS
,
14004 IX86_BUILTIN_LDMXCSR
,
14005 IX86_BUILTIN_STMXCSR
,
14006 IX86_BUILTIN_SFENCE
,
14008 /* 3DNow! Original */
14009 IX86_BUILTIN_FEMMS
,
14010 IX86_BUILTIN_PAVGUSB
,
14011 IX86_BUILTIN_PF2ID
,
14012 IX86_BUILTIN_PFACC
,
14013 IX86_BUILTIN_PFADD
,
14014 IX86_BUILTIN_PFCMPEQ
,
14015 IX86_BUILTIN_PFCMPGE
,
14016 IX86_BUILTIN_PFCMPGT
,
14017 IX86_BUILTIN_PFMAX
,
14018 IX86_BUILTIN_PFMIN
,
14019 IX86_BUILTIN_PFMUL
,
14020 IX86_BUILTIN_PFRCP
,
14021 IX86_BUILTIN_PFRCPIT1
,
14022 IX86_BUILTIN_PFRCPIT2
,
14023 IX86_BUILTIN_PFRSQIT1
,
14024 IX86_BUILTIN_PFRSQRT
,
14025 IX86_BUILTIN_PFSUB
,
14026 IX86_BUILTIN_PFSUBR
,
14027 IX86_BUILTIN_PI2FD
,
14028 IX86_BUILTIN_PMULHRW
,
14030 /* 3DNow! Athlon Extensions */
14031 IX86_BUILTIN_PF2IW
,
14032 IX86_BUILTIN_PFNACC
,
14033 IX86_BUILTIN_PFPNACC
,
14034 IX86_BUILTIN_PI2FW
,
14035 IX86_BUILTIN_PSWAPDSI
,
14036 IX86_BUILTIN_PSWAPDSF
,
14039 IX86_BUILTIN_ADDPD
,
14040 IX86_BUILTIN_ADDSD
,
14041 IX86_BUILTIN_DIVPD
,
14042 IX86_BUILTIN_DIVSD
,
14043 IX86_BUILTIN_MULPD
,
14044 IX86_BUILTIN_MULSD
,
14045 IX86_BUILTIN_SUBPD
,
14046 IX86_BUILTIN_SUBSD
,
14048 IX86_BUILTIN_CMPEQPD
,
14049 IX86_BUILTIN_CMPLTPD
,
14050 IX86_BUILTIN_CMPLEPD
,
14051 IX86_BUILTIN_CMPGTPD
,
14052 IX86_BUILTIN_CMPGEPD
,
14053 IX86_BUILTIN_CMPNEQPD
,
14054 IX86_BUILTIN_CMPNLTPD
,
14055 IX86_BUILTIN_CMPNLEPD
,
14056 IX86_BUILTIN_CMPNGTPD
,
14057 IX86_BUILTIN_CMPNGEPD
,
14058 IX86_BUILTIN_CMPORDPD
,
14059 IX86_BUILTIN_CMPUNORDPD
,
14060 IX86_BUILTIN_CMPNEPD
,
14061 IX86_BUILTIN_CMPEQSD
,
14062 IX86_BUILTIN_CMPLTSD
,
14063 IX86_BUILTIN_CMPLESD
,
14064 IX86_BUILTIN_CMPNEQSD
,
14065 IX86_BUILTIN_CMPNLTSD
,
14066 IX86_BUILTIN_CMPNLESD
,
14067 IX86_BUILTIN_CMPORDSD
,
14068 IX86_BUILTIN_CMPUNORDSD
,
14069 IX86_BUILTIN_CMPNESD
,
14071 IX86_BUILTIN_COMIEQSD
,
14072 IX86_BUILTIN_COMILTSD
,
14073 IX86_BUILTIN_COMILESD
,
14074 IX86_BUILTIN_COMIGTSD
,
14075 IX86_BUILTIN_COMIGESD
,
14076 IX86_BUILTIN_COMINEQSD
,
14077 IX86_BUILTIN_UCOMIEQSD
,
14078 IX86_BUILTIN_UCOMILTSD
,
14079 IX86_BUILTIN_UCOMILESD
,
14080 IX86_BUILTIN_UCOMIGTSD
,
14081 IX86_BUILTIN_UCOMIGESD
,
14082 IX86_BUILTIN_UCOMINEQSD
,
14084 IX86_BUILTIN_MAXPD
,
14085 IX86_BUILTIN_MAXSD
,
14086 IX86_BUILTIN_MINPD
,
14087 IX86_BUILTIN_MINSD
,
14089 IX86_BUILTIN_ANDPD
,
14090 IX86_BUILTIN_ANDNPD
,
14092 IX86_BUILTIN_XORPD
,
14094 IX86_BUILTIN_SQRTPD
,
14095 IX86_BUILTIN_SQRTSD
,
14097 IX86_BUILTIN_UNPCKHPD
,
14098 IX86_BUILTIN_UNPCKLPD
,
14100 IX86_BUILTIN_SHUFPD
,
14102 IX86_BUILTIN_LOADUPD
,
14103 IX86_BUILTIN_STOREUPD
,
14104 IX86_BUILTIN_MOVSD
,
14106 IX86_BUILTIN_LOADHPD
,
14107 IX86_BUILTIN_LOADLPD
,
14109 IX86_BUILTIN_CVTDQ2PD
,
14110 IX86_BUILTIN_CVTDQ2PS
,
14112 IX86_BUILTIN_CVTPD2DQ
,
14113 IX86_BUILTIN_CVTPD2PI
,
14114 IX86_BUILTIN_CVTPD2PS
,
14115 IX86_BUILTIN_CVTTPD2DQ
,
14116 IX86_BUILTIN_CVTTPD2PI
,
14118 IX86_BUILTIN_CVTPI2PD
,
14119 IX86_BUILTIN_CVTSI2SD
,
14120 IX86_BUILTIN_CVTSI642SD
,
14122 IX86_BUILTIN_CVTSD2SI
,
14123 IX86_BUILTIN_CVTSD2SI64
,
14124 IX86_BUILTIN_CVTSD2SS
,
14125 IX86_BUILTIN_CVTSS2SD
,
14126 IX86_BUILTIN_CVTTSD2SI
,
14127 IX86_BUILTIN_CVTTSD2SI64
,
14129 IX86_BUILTIN_CVTPS2DQ
,
14130 IX86_BUILTIN_CVTPS2PD
,
14131 IX86_BUILTIN_CVTTPS2DQ
,
14133 IX86_BUILTIN_MOVNTI
,
14134 IX86_BUILTIN_MOVNTPD
,
14135 IX86_BUILTIN_MOVNTDQ
,
14138 IX86_BUILTIN_MASKMOVDQU
,
14139 IX86_BUILTIN_MOVMSKPD
,
14140 IX86_BUILTIN_PMOVMSKB128
,
14142 IX86_BUILTIN_PACKSSWB128
,
14143 IX86_BUILTIN_PACKSSDW128
,
14144 IX86_BUILTIN_PACKUSWB128
,
14146 IX86_BUILTIN_PADDB128
,
14147 IX86_BUILTIN_PADDW128
,
14148 IX86_BUILTIN_PADDD128
,
14149 IX86_BUILTIN_PADDQ128
,
14150 IX86_BUILTIN_PADDSB128
,
14151 IX86_BUILTIN_PADDSW128
,
14152 IX86_BUILTIN_PADDUSB128
,
14153 IX86_BUILTIN_PADDUSW128
,
14154 IX86_BUILTIN_PSUBB128
,
14155 IX86_BUILTIN_PSUBW128
,
14156 IX86_BUILTIN_PSUBD128
,
14157 IX86_BUILTIN_PSUBQ128
,
14158 IX86_BUILTIN_PSUBSB128
,
14159 IX86_BUILTIN_PSUBSW128
,
14160 IX86_BUILTIN_PSUBUSB128
,
14161 IX86_BUILTIN_PSUBUSW128
,
14163 IX86_BUILTIN_PAND128
,
14164 IX86_BUILTIN_PANDN128
,
14165 IX86_BUILTIN_POR128
,
14166 IX86_BUILTIN_PXOR128
,
14168 IX86_BUILTIN_PAVGB128
,
14169 IX86_BUILTIN_PAVGW128
,
14171 IX86_BUILTIN_PCMPEQB128
,
14172 IX86_BUILTIN_PCMPEQW128
,
14173 IX86_BUILTIN_PCMPEQD128
,
14174 IX86_BUILTIN_PCMPGTB128
,
14175 IX86_BUILTIN_PCMPGTW128
,
14176 IX86_BUILTIN_PCMPGTD128
,
14178 IX86_BUILTIN_PMADDWD128
,
14180 IX86_BUILTIN_PMAXSW128
,
14181 IX86_BUILTIN_PMAXUB128
,
14182 IX86_BUILTIN_PMINSW128
,
14183 IX86_BUILTIN_PMINUB128
,
14185 IX86_BUILTIN_PMULUDQ
,
14186 IX86_BUILTIN_PMULUDQ128
,
14187 IX86_BUILTIN_PMULHUW128
,
14188 IX86_BUILTIN_PMULHW128
,
14189 IX86_BUILTIN_PMULLW128
,
14191 IX86_BUILTIN_PSADBW128
,
14192 IX86_BUILTIN_PSHUFHW
,
14193 IX86_BUILTIN_PSHUFLW
,
14194 IX86_BUILTIN_PSHUFD
,
14196 IX86_BUILTIN_PSLLW128
,
14197 IX86_BUILTIN_PSLLD128
,
14198 IX86_BUILTIN_PSLLQ128
,
14199 IX86_BUILTIN_PSRAW128
,
14200 IX86_BUILTIN_PSRAD128
,
14201 IX86_BUILTIN_PSRLW128
,
14202 IX86_BUILTIN_PSRLD128
,
14203 IX86_BUILTIN_PSRLQ128
,
14204 IX86_BUILTIN_PSLLDQI128
,
14205 IX86_BUILTIN_PSLLWI128
,
14206 IX86_BUILTIN_PSLLDI128
,
14207 IX86_BUILTIN_PSLLQI128
,
14208 IX86_BUILTIN_PSRAWI128
,
14209 IX86_BUILTIN_PSRADI128
,
14210 IX86_BUILTIN_PSRLDQI128
,
14211 IX86_BUILTIN_PSRLWI128
,
14212 IX86_BUILTIN_PSRLDI128
,
14213 IX86_BUILTIN_PSRLQI128
,
14215 IX86_BUILTIN_PUNPCKHBW128
,
14216 IX86_BUILTIN_PUNPCKHWD128
,
14217 IX86_BUILTIN_PUNPCKHDQ128
,
14218 IX86_BUILTIN_PUNPCKHQDQ128
,
14219 IX86_BUILTIN_PUNPCKLBW128
,
14220 IX86_BUILTIN_PUNPCKLWD128
,
14221 IX86_BUILTIN_PUNPCKLDQ128
,
14222 IX86_BUILTIN_PUNPCKLQDQ128
,
14224 IX86_BUILTIN_CLFLUSH
,
14225 IX86_BUILTIN_MFENCE
,
14226 IX86_BUILTIN_LFENCE
,
14228 /* Prescott New Instructions. */
14229 IX86_BUILTIN_ADDSUBPS
,
14230 IX86_BUILTIN_HADDPS
,
14231 IX86_BUILTIN_HSUBPS
,
14232 IX86_BUILTIN_MOVSHDUP
,
14233 IX86_BUILTIN_MOVSLDUP
,
14234 IX86_BUILTIN_ADDSUBPD
,
14235 IX86_BUILTIN_HADDPD
,
14236 IX86_BUILTIN_HSUBPD
,
14237 IX86_BUILTIN_LDDQU
,
14239 IX86_BUILTIN_MONITOR
,
14240 IX86_BUILTIN_MWAIT
,
14242 IX86_BUILTIN_VEC_INIT_V2SI
,
14243 IX86_BUILTIN_VEC_INIT_V4HI
,
14244 IX86_BUILTIN_VEC_INIT_V8QI
,
14245 IX86_BUILTIN_VEC_EXT_V2DF
,
14246 IX86_BUILTIN_VEC_EXT_V2DI
,
14247 IX86_BUILTIN_VEC_EXT_V4SF
,
14248 IX86_BUILTIN_VEC_EXT_V4SI
,
14249 IX86_BUILTIN_VEC_EXT_V8HI
,
14250 IX86_BUILTIN_VEC_EXT_V2SI
,
14251 IX86_BUILTIN_VEC_EXT_V4HI
,
14252 IX86_BUILTIN_VEC_SET_V8HI
,
14253 IX86_BUILTIN_VEC_SET_V4HI
,
14255 /* SSE2 ABI functions. */
14256 IX86_BUILTIN_SSE2_ACOS
,
14257 IX86_BUILTIN_SSE2_ACOSF
,
14258 IX86_BUILTIN_SSE2_ASIN
,
14259 IX86_BUILTIN_SSE2_ASINF
,
14260 IX86_BUILTIN_SSE2_ATAN
,
14261 IX86_BUILTIN_SSE2_ATANF
,
14262 IX86_BUILTIN_SSE2_ATAN2
,
14263 IX86_BUILTIN_SSE2_ATAN2F
,
14264 IX86_BUILTIN_SSE2_COS
,
14265 IX86_BUILTIN_SSE2_COSF
,
14266 IX86_BUILTIN_SSE2_EXP
,
14267 IX86_BUILTIN_SSE2_EXPF
,
14268 IX86_BUILTIN_SSE2_LOG10
,
14269 IX86_BUILTIN_SSE2_LOG10F
,
14270 IX86_BUILTIN_SSE2_LOG
,
14271 IX86_BUILTIN_SSE2_LOGF
,
14272 IX86_BUILTIN_SSE2_SIN
,
14273 IX86_BUILTIN_SSE2_SINF
,
14274 IX86_BUILTIN_SSE2_TAN
,
14275 IX86_BUILTIN_SSE2_TANF
,
14280 #define def_builtin(MASK, NAME, TYPE, CODE) \
14282 if ((MASK) & target_flags \
14283 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14284 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14285 NULL, NULL_TREE); \
14288 /* Bits for builtin_description.flag. */
14290 /* Set when we don't support the comparison natively, and should
14291 swap_comparison in order to support it. */
14292 #define BUILTIN_DESC_SWAP_OPERANDS 1
14294 struct builtin_description
14296 const unsigned int mask
;
14297 const enum insn_code icode
;
14298 const char *const name
;
14299 const enum ix86_builtins code
;
14300 const enum rtx_code comparison
;
14301 const unsigned int flag
;
14304 static const struct builtin_description bdesc_comi
[] =
14306 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
14307 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
14308 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
14309 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
14310 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
14311 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
14312 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
14313 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
14314 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
14315 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
14316 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
14317 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
14318 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
14319 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
14320 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
14321 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
14322 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
14323 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
14324 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
14325 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
14326 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
14327 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
14328 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
14329 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
14332 static const struct builtin_description bdesc_2arg
[] =
14335 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
14336 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
14337 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
14338 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
14339 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
14340 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
14341 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
14342 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
14344 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
14345 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
14346 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
14347 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
14348 BUILTIN_DESC_SWAP_OPERANDS
},
14349 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
14350 BUILTIN_DESC_SWAP_OPERANDS
},
14351 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
14352 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
14353 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
14354 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
14355 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
14356 BUILTIN_DESC_SWAP_OPERANDS
},
14357 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
14358 BUILTIN_DESC_SWAP_OPERANDS
},
14359 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
14360 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
14361 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
14362 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
14363 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
14364 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
14365 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
14366 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
14367 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
14368 BUILTIN_DESC_SWAP_OPERANDS
},
14369 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
14370 BUILTIN_DESC_SWAP_OPERANDS
},
14371 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
14373 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
14374 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
14375 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
14376 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
14378 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
14379 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
14380 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
14381 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
14383 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
14384 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
14385 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
14386 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
14387 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
14390 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
14391 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
14392 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
14393 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
14394 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
14395 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
14396 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
14397 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
14399 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
14400 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
14401 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
14402 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
14403 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
14404 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
14405 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
14406 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
14408 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
14409 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
14410 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
14412 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
14413 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
14414 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
14415 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
14417 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
14418 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
14420 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
14421 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
14422 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
14423 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
14424 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
14425 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
14427 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
14428 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
14429 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
14430 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
14432 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
14433 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
14434 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
14435 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
14436 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
14437 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
14440 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
14441 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
14442 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
14444 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
14445 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
14446 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
14448 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
14449 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
14450 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
14451 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
14452 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
14453 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
14455 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
14456 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
14457 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
14458 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
14459 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
14460 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
14462 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
14463 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
14464 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
14465 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
14467 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
14468 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
14471 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
14472 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
14473 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
14474 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
14475 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
14476 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
14477 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
14478 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
14480 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
14481 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
14482 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
14483 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
14484 BUILTIN_DESC_SWAP_OPERANDS
},
14485 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
14486 BUILTIN_DESC_SWAP_OPERANDS
},
14487 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
14488 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
14489 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
14490 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
14491 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
14492 BUILTIN_DESC_SWAP_OPERANDS
},
14493 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
14494 BUILTIN_DESC_SWAP_OPERANDS
},
14495 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
14496 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
14497 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
14498 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
14499 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
14500 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
14501 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
14502 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
14503 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
14505 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
14506 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
14507 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
14508 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
14510 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
14511 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
14512 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
14513 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
14515 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
14516 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
14517 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
14520 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
14521 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
14522 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
14523 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
14524 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
14525 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
14526 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
14527 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
14529 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
14530 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
14531 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
14532 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
14533 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
14534 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
14535 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
14536 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
14538 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
14539 { MASK_SSE2
, CODE_FOR_sse2_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
14541 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
14542 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
14543 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
14544 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
14546 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
14547 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
14549 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
14550 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
14551 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
14552 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
14553 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
14554 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
14556 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
14557 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
14558 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
14559 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
14561 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
14562 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
14563 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
14564 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
14565 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
14566 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
14567 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
14568 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
14570 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
14571 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
14572 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
14574 { MASK_SSE2
, CODE_FOR_sse2_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
14575 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
14577 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
14578 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
14580 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
14581 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
14582 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
14584 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
14585 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
14586 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
14588 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
14589 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
14591 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
14593 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
14594 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
14595 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
14596 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
14599 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
14600 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
14601 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
14602 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
14603 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
14604 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
14607 static const struct builtin_description bdesc_1arg
[] =
14609 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
14610 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
14612 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
14613 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
14614 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
14616 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
14617 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
14618 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
14619 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
14620 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
14621 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
14623 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
14624 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
14626 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
14628 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
14629 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
14631 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
14632 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
14633 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
14634 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
14635 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
14637 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
14639 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
14640 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
14641 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
14642 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
14644 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
14645 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
14646 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
14649 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
14650 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
14654 ix86_init_builtins (void)
14657 ix86_init_mmx_sse_builtins ();
14659 ix86_init_sse_abi_builtins ();
14662 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14663 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14666 ix86_init_mmx_sse_builtins (void)
14668 const struct builtin_description
* d
;
14671 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
14672 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
14673 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
14674 tree V2DI_type_node
14675 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
14676 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
14677 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
14678 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
14679 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
14680 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
14681 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
14683 tree pchar_type_node
= build_pointer_type (char_type_node
);
14684 tree pcchar_type_node
= build_pointer_type (
14685 build_type_variant (char_type_node
, 1, 0));
14686 tree pfloat_type_node
= build_pointer_type (float_type_node
);
14687 tree pcfloat_type_node
= build_pointer_type (
14688 build_type_variant (float_type_node
, 1, 0));
14689 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
14690 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
14691 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
14694 tree int_ftype_v4sf_v4sf
14695 = build_function_type_list (integer_type_node
,
14696 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
14697 tree v4si_ftype_v4sf_v4sf
14698 = build_function_type_list (V4SI_type_node
,
14699 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
14700 /* MMX/SSE/integer conversions. */
14701 tree int_ftype_v4sf
14702 = build_function_type_list (integer_type_node
,
14703 V4SF_type_node
, NULL_TREE
);
14704 tree int64_ftype_v4sf
14705 = build_function_type_list (long_long_integer_type_node
,
14706 V4SF_type_node
, NULL_TREE
);
14707 tree int_ftype_v8qi
14708 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
14709 tree v4sf_ftype_v4sf_int
14710 = build_function_type_list (V4SF_type_node
,
14711 V4SF_type_node
, integer_type_node
, NULL_TREE
);
14712 tree v4sf_ftype_v4sf_int64
14713 = build_function_type_list (V4SF_type_node
,
14714 V4SF_type_node
, long_long_integer_type_node
,
14716 tree v4sf_ftype_v4sf_v2si
14717 = build_function_type_list (V4SF_type_node
,
14718 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
14720 /* Miscellaneous. */
14721 tree v8qi_ftype_v4hi_v4hi
14722 = build_function_type_list (V8QI_type_node
,
14723 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
14724 tree v4hi_ftype_v2si_v2si
14725 = build_function_type_list (V4HI_type_node
,
14726 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
14727 tree v4sf_ftype_v4sf_v4sf_int
14728 = build_function_type_list (V4SF_type_node
,
14729 V4SF_type_node
, V4SF_type_node
,
14730 integer_type_node
, NULL_TREE
);
14731 tree v2si_ftype_v4hi_v4hi
14732 = build_function_type_list (V2SI_type_node
,
14733 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
14734 tree v4hi_ftype_v4hi_int
14735 = build_function_type_list (V4HI_type_node
,
14736 V4HI_type_node
, integer_type_node
, NULL_TREE
);
14737 tree v4hi_ftype_v4hi_di
14738 = build_function_type_list (V4HI_type_node
,
14739 V4HI_type_node
, long_long_unsigned_type_node
,
14741 tree v2si_ftype_v2si_di
14742 = build_function_type_list (V2SI_type_node
,
14743 V2SI_type_node
, long_long_unsigned_type_node
,
14745 tree void_ftype_void
14746 = build_function_type (void_type_node
, void_list_node
);
14747 tree void_ftype_unsigned
14748 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
14749 tree void_ftype_unsigned_unsigned
14750 = build_function_type_list (void_type_node
, unsigned_type_node
,
14751 unsigned_type_node
, NULL_TREE
);
14752 tree void_ftype_pcvoid_unsigned_unsigned
14753 = build_function_type_list (void_type_node
, const_ptr_type_node
,
14754 unsigned_type_node
, unsigned_type_node
,
14756 tree unsigned_ftype_void
14757 = build_function_type (unsigned_type_node
, void_list_node
);
14758 tree v2si_ftype_v4sf
14759 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
14760 /* Loads/stores. */
14761 tree void_ftype_v8qi_v8qi_pchar
14762 = build_function_type_list (void_type_node
,
14763 V8QI_type_node
, V8QI_type_node
,
14764 pchar_type_node
, NULL_TREE
);
14765 tree v4sf_ftype_pcfloat
14766 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
14767 /* @@@ the type is bogus */
14768 tree v4sf_ftype_v4sf_pv2si
14769 = build_function_type_list (V4SF_type_node
,
14770 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
14771 tree void_ftype_pv2si_v4sf
14772 = build_function_type_list (void_type_node
,
14773 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
14774 tree void_ftype_pfloat_v4sf
14775 = build_function_type_list (void_type_node
,
14776 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
14777 tree void_ftype_pdi_di
14778 = build_function_type_list (void_type_node
,
14779 pdi_type_node
, long_long_unsigned_type_node
,
14781 tree void_ftype_pv2di_v2di
14782 = build_function_type_list (void_type_node
,
14783 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
14784 /* Normal vector unops. */
14785 tree v4sf_ftype_v4sf
14786 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
14788 /* Normal vector binops. */
14789 tree v4sf_ftype_v4sf_v4sf
14790 = build_function_type_list (V4SF_type_node
,
14791 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
14792 tree v8qi_ftype_v8qi_v8qi
14793 = build_function_type_list (V8QI_type_node
,
14794 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
14795 tree v4hi_ftype_v4hi_v4hi
14796 = build_function_type_list (V4HI_type_node
,
14797 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
14798 tree v2si_ftype_v2si_v2si
14799 = build_function_type_list (V2SI_type_node
,
14800 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
14801 tree di_ftype_di_di
14802 = build_function_type_list (long_long_unsigned_type_node
,
14803 long_long_unsigned_type_node
,
14804 long_long_unsigned_type_node
, NULL_TREE
);
14806 tree v2si_ftype_v2sf
14807 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
14808 tree v2sf_ftype_v2si
14809 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
14810 tree v2si_ftype_v2si
14811 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
14812 tree v2sf_ftype_v2sf
14813 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
14814 tree v2sf_ftype_v2sf_v2sf
14815 = build_function_type_list (V2SF_type_node
,
14816 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
14817 tree v2si_ftype_v2sf_v2sf
14818 = build_function_type_list (V2SI_type_node
,
14819 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
14820 tree pint_type_node
= build_pointer_type (integer_type_node
);
14821 tree pdouble_type_node
= build_pointer_type (double_type_node
);
14822 tree pcdouble_type_node
= build_pointer_type (
14823 build_type_variant (double_type_node
, 1, 0));
14824 tree int_ftype_v2df_v2df
14825 = build_function_type_list (integer_type_node
,
14826 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
14828 tree void_ftype_pcvoid
14829 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
14830 tree v4sf_ftype_v4si
14831 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
14832 tree v4si_ftype_v4sf
14833 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
14834 tree v2df_ftype_v4si
14835 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
14836 tree v4si_ftype_v2df
14837 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
14838 tree v2si_ftype_v2df
14839 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
14840 tree v4sf_ftype_v2df
14841 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
14842 tree v2df_ftype_v2si
14843 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
14844 tree v2df_ftype_v4sf
14845 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
14846 tree int_ftype_v2df
14847 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
14848 tree int64_ftype_v2df
14849 = build_function_type_list (long_long_integer_type_node
,
14850 V2DF_type_node
, NULL_TREE
);
14851 tree v2df_ftype_v2df_int
14852 = build_function_type_list (V2DF_type_node
,
14853 V2DF_type_node
, integer_type_node
, NULL_TREE
);
14854 tree v2df_ftype_v2df_int64
14855 = build_function_type_list (V2DF_type_node
,
14856 V2DF_type_node
, long_long_integer_type_node
,
14858 tree v4sf_ftype_v4sf_v2df
14859 = build_function_type_list (V4SF_type_node
,
14860 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
14861 tree v2df_ftype_v2df_v4sf
14862 = build_function_type_list (V2DF_type_node
,
14863 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
14864 tree v2df_ftype_v2df_v2df_int
14865 = build_function_type_list (V2DF_type_node
,
14866 V2DF_type_node
, V2DF_type_node
,
14869 tree v2df_ftype_v2df_pcdouble
14870 = build_function_type_list (V2DF_type_node
,
14871 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
14872 tree void_ftype_pdouble_v2df
14873 = build_function_type_list (void_type_node
,
14874 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
14875 tree void_ftype_pint_int
14876 = build_function_type_list (void_type_node
,
14877 pint_type_node
, integer_type_node
, NULL_TREE
);
14878 tree void_ftype_v16qi_v16qi_pchar
14879 = build_function_type_list (void_type_node
,
14880 V16QI_type_node
, V16QI_type_node
,
14881 pchar_type_node
, NULL_TREE
);
14882 tree v2df_ftype_pcdouble
14883 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
14884 tree v2df_ftype_v2df_v2df
14885 = build_function_type_list (V2DF_type_node
,
14886 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
14887 tree v16qi_ftype_v16qi_v16qi
14888 = build_function_type_list (V16QI_type_node
,
14889 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
14890 tree v8hi_ftype_v8hi_v8hi
14891 = build_function_type_list (V8HI_type_node
,
14892 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
14893 tree v4si_ftype_v4si_v4si
14894 = build_function_type_list (V4SI_type_node
,
14895 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
14896 tree v2di_ftype_v2di_v2di
14897 = build_function_type_list (V2DI_type_node
,
14898 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
14899 tree v2di_ftype_v2df_v2df
14900 = build_function_type_list (V2DI_type_node
,
14901 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
14902 tree v2df_ftype_v2df
14903 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
14904 tree v2di_ftype_v2di_int
14905 = build_function_type_list (V2DI_type_node
,
14906 V2DI_type_node
, integer_type_node
, NULL_TREE
);
14907 tree v4si_ftype_v4si_int
14908 = build_function_type_list (V4SI_type_node
,
14909 V4SI_type_node
, integer_type_node
, NULL_TREE
);
14910 tree v8hi_ftype_v8hi_int
14911 = build_function_type_list (V8HI_type_node
,
14912 V8HI_type_node
, integer_type_node
, NULL_TREE
);
14913 tree v8hi_ftype_v8hi_v2di
14914 = build_function_type_list (V8HI_type_node
,
14915 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
14916 tree v4si_ftype_v4si_v2di
14917 = build_function_type_list (V4SI_type_node
,
14918 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
14919 tree v4si_ftype_v8hi_v8hi
14920 = build_function_type_list (V4SI_type_node
,
14921 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
14922 tree di_ftype_v8qi_v8qi
14923 = build_function_type_list (long_long_unsigned_type_node
,
14924 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
14925 tree di_ftype_v2si_v2si
14926 = build_function_type_list (long_long_unsigned_type_node
,
14927 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
14928 tree v2di_ftype_v16qi_v16qi
14929 = build_function_type_list (V2DI_type_node
,
14930 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
14931 tree v2di_ftype_v4si_v4si
14932 = build_function_type_list (V2DI_type_node
,
14933 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
14934 tree int_ftype_v16qi
14935 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
14936 tree v16qi_ftype_pcchar
14937 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
14938 tree void_ftype_pchar_v16qi
14939 = build_function_type_list (void_type_node
,
14940 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
14943 tree float128_type
;
14946 /* The __float80 type. */
14947 if (TYPE_MODE (long_double_type_node
) == XFmode
)
14948 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
14952 /* The __float80 type. */
14953 float80_type
= make_node (REAL_TYPE
);
14954 TYPE_PRECISION (float80_type
) = 80;
14955 layout_type (float80_type
);
14956 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
14961 float128_type
= make_node (REAL_TYPE
);
14962 TYPE_PRECISION (float128_type
) = 128;
14963 layout_type (float128_type
);
14964 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
14967 /* Add all builtins that are more or less simple operations on two
14969 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
14971 /* Use one of the operands; the target can have a different mode for
14972 mask-generating compares. */
14973 enum machine_mode mode
;
14978 mode
= insn_data
[d
->icode
].operand
[1].mode
;
14983 type
= v16qi_ftype_v16qi_v16qi
;
14986 type
= v8hi_ftype_v8hi_v8hi
;
14989 type
= v4si_ftype_v4si_v4si
;
14992 type
= v2di_ftype_v2di_v2di
;
14995 type
= v2df_ftype_v2df_v2df
;
14998 type
= v4sf_ftype_v4sf_v4sf
;
15001 type
= v8qi_ftype_v8qi_v8qi
;
15004 type
= v4hi_ftype_v4hi_v4hi
;
15007 type
= v2si_ftype_v2si_v2si
;
15010 type
= di_ftype_di_di
;
15014 gcc_unreachable ();
15017 /* Override for comparisons. */
15018 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
15019 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
15020 type
= v4si_ftype_v4sf_v4sf
;
15022 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
15023 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
15024 type
= v2di_ftype_v2df_v2df
;
15026 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
15029 /* Add the remaining MMX insns with somewhat more complicated types. */
15030 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
15031 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
15032 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
15033 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
15035 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
15036 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
15037 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
15039 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
15040 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
15042 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
15043 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
15045 /* comi/ucomi insns. */
15046 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
15047 if (d
->mask
== MASK_SSE2
)
15048 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
15050 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
15052 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
15053 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
15054 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
15056 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
15057 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
15058 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
15059 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
15060 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
15061 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
15062 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
15063 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
15064 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
15065 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
15066 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
15068 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
15070 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
15071 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
15073 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
15074 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
15075 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
15076 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
15078 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
15079 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
15080 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
15081 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
15083 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
15085 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
15087 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
15088 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
15089 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
15090 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
15091 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
15092 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
15094 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
15096 /* Original 3DNow! */
15097 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
15098 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
15099 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
15100 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
15101 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
15102 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
15103 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
15104 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
15105 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
15106 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
15107 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
15108 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
15109 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
15110 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
15111 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
15112 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
15113 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
15114 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
15115 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
15116 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
15118 /* 3DNow! extension as used in the Athlon CPU. */
15119 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
15120 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
15121 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
15122 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
15123 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
15124 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
15127 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
15129 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
15130 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
15132 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
15133 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
15135 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
15136 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
15137 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
15138 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
15139 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
15141 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
15142 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
15143 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
15144 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
15146 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
15147 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
15149 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
15151 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
15152 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
15154 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
15155 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
15156 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
15157 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
15158 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
15160 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
15162 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
15163 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
15164 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
15165 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
15167 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
15168 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
15169 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
15171 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
15172 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
15173 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
15174 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
15176 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
15177 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
15178 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
15180 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
15181 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
15183 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
15184 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
15186 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
15187 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
15188 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
15190 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
15191 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
15192 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
15194 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
15195 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
15197 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
15198 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
15199 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
15200 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
15202 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
15203 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
15204 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
15205 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
15207 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
15208 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
15210 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
15212 /* Prescott New Instructions. */
15213 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
15214 void_ftype_pcvoid_unsigned_unsigned
,
15215 IX86_BUILTIN_MONITOR
);
15216 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
15217 void_ftype_unsigned_unsigned
,
15218 IX86_BUILTIN_MWAIT
);
15219 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
15221 IX86_BUILTIN_MOVSHDUP
);
15222 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
15224 IX86_BUILTIN_MOVSLDUP
);
15225 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
15226 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
15228 /* Access to the vec_init patterns. */
15229 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
15230 integer_type_node
, NULL_TREE
);
15231 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
15232 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
15234 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
15235 short_integer_type_node
,
15236 short_integer_type_node
,
15237 short_integer_type_node
, NULL_TREE
);
15238 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
15239 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
15241 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
15242 char_type_node
, char_type_node
,
15243 char_type_node
, char_type_node
,
15244 char_type_node
, char_type_node
,
15245 char_type_node
, NULL_TREE
);
15246 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
15247 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
15249 /* Access to the vec_extract patterns. */
15250 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
15251 integer_type_node
, NULL_TREE
);
15252 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
15253 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
15255 ftype
= build_function_type_list (long_long_integer_type_node
,
15256 V2DI_type_node
, integer_type_node
,
15258 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
15259 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
15261 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
15262 integer_type_node
, NULL_TREE
);
15263 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
15264 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
15266 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
15267 integer_type_node
, NULL_TREE
);
15268 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
15269 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
15271 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
15272 integer_type_node
, NULL_TREE
);
15273 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
15274 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
15276 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
15277 integer_type_node
, NULL_TREE
);
15278 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
15279 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
15281 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
15282 integer_type_node
, NULL_TREE
);
15283 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
15284 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
15286 /* Access to the vec_set patterns. */
15287 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
15289 integer_type_node
, NULL_TREE
);
15290 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
15291 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
15293 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
15295 integer_type_node
, NULL_TREE
);
15296 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
15297 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
15301 /* Set up all the SSE ABI builtins that we may use to override
15302 the normal builtins. */
15304 ix86_init_sse_abi_builtins (void)
15306 tree dbl
, flt
, dbl2
, flt2
;
15308 /* Bail out in case the template definitions are not available. */
15309 if (! built_in_decls
[BUILT_IN_SIN
]
15310 || ! built_in_decls
[BUILT_IN_SINF
]
15311 || ! built_in_decls
[BUILT_IN_ATAN2
]
15312 || ! built_in_decls
[BUILT_IN_ATAN2F
])
15315 /* Build the function types as variants of the existing ones. */
15316 dbl
= build_variant_type_copy (TREE_TYPE (built_in_decls
[BUILT_IN_SIN
]));
15317 TYPE_ATTRIBUTES (dbl
)
15318 = tree_cons (get_identifier ("sseregparm"),
15319 NULL_TREE
, TYPE_ATTRIBUTES (dbl
));
15320 flt
= build_variant_type_copy (TREE_TYPE (built_in_decls
[BUILT_IN_SINF
]));
15321 TYPE_ATTRIBUTES (flt
)
15322 = tree_cons (get_identifier ("sseregparm"),
15323 NULL_TREE
, TYPE_ATTRIBUTES (flt
));
15324 dbl2
= build_variant_type_copy (TREE_TYPE (built_in_decls
[BUILT_IN_ATAN2
]));
15325 TYPE_ATTRIBUTES (dbl2
)
15326 = tree_cons (get_identifier ("sseregparm"),
15327 NULL_TREE
, TYPE_ATTRIBUTES (dbl2
));
15328 flt2
= build_variant_type_copy (TREE_TYPE (built_in_decls
[BUILT_IN_ATAN2F
]));
15329 TYPE_ATTRIBUTES (flt2
)
15330 = tree_cons (get_identifier ("sseregparm"),
15331 NULL_TREE
, TYPE_ATTRIBUTES (flt2
));
15333 #define def_builtin(capname, name, type) \
15334 ix86_builtin_function_variants [BUILT_IN_ ## capname] \
15335 = lang_hooks.builtin_function ("__builtin_sse2_" # name, type, \
15336 IX86_BUILTIN_SSE2_ ## capname, \
15338 "__libm_sse2_" # name, NULL_TREE)
15340 def_builtin (ACOS
, acos
, dbl
);
15341 def_builtin (ACOSF
, acosf
, flt
);
15342 def_builtin (ASIN
, asin
, dbl
);
15343 def_builtin (ASINF
, asinf
, flt
);
15344 def_builtin (ATAN
, atan
, dbl
);
15345 def_builtin (ATANF
, atanf
, flt
);
15346 def_builtin (ATAN2
, atan2
, dbl2
);
15347 def_builtin (ATAN2F
, atan2f
, flt2
);
15348 def_builtin (COS
, cos
, dbl
);
15349 def_builtin (COSF
, cosf
, flt
);
15350 def_builtin (EXP
, exp
, dbl
);
15351 def_builtin (EXPF
, expf
, flt
);
15352 def_builtin (LOG10
, log10
, dbl
);
15353 def_builtin (LOG10F
, log10f
, flt
);
15354 def_builtin (LOG
, log
, dbl
);
15355 def_builtin (LOGF
, logf
, flt
);
15356 def_builtin (SIN
, sin
, dbl
);
15357 def_builtin (SINF
, sinf
, flt
);
15358 def_builtin (TAN
, tan
, dbl
);
15359 def_builtin (TANF
, tanf
, flt
);
15364 /* Errors in the source file can cause expand_expr to return const0_rtx
15365 where we expect a vector. To avoid crashing, use one of the vector
15366 clear instructions. */
15368 safe_vector_operand (rtx x
, enum machine_mode mode
)
15370 if (x
== const0_rtx
)
15371 x
= CONST0_RTX (mode
);
15375 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15378 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
15381 tree arg0
= TREE_VALUE (arglist
);
15382 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15383 rtx op0
= expand_normal (arg0
);
15384 rtx op1
= expand_normal (arg1
);
15385 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15386 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15387 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
15389 if (VECTOR_MODE_P (mode0
))
15390 op0
= safe_vector_operand (op0
, mode0
);
15391 if (VECTOR_MODE_P (mode1
))
15392 op1
= safe_vector_operand (op1
, mode1
);
15394 if (optimize
|| !target
15395 || GET_MODE (target
) != tmode
15396 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15397 target
= gen_reg_rtx (tmode
);
15399 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
15401 rtx x
= gen_reg_rtx (V4SImode
);
15402 emit_insn (gen_sse2_loadd (x
, op1
));
15403 op1
= gen_lowpart (TImode
, x
);
15406 /* The insn must want input operands in the same modes as the
15408 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
15409 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
15411 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15412 op0
= copy_to_mode_reg (mode0
, op0
);
15413 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15414 op1
= copy_to_mode_reg (mode1
, op1
);
15416 /* ??? Using ix86_fixup_binary_operands is problematic when
15417 we've got mismatched modes. Fake it. */
15423 if (tmode
== mode0
&& tmode
== mode1
)
15425 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
15429 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
15431 op0
= force_reg (mode0
, op0
);
15432 op1
= force_reg (mode1
, op1
);
15433 target
= gen_reg_rtx (tmode
);
15436 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15443 /* Subroutine of ix86_expand_builtin to take care of stores. */
15446 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
15449 tree arg0
= TREE_VALUE (arglist
);
15450 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15451 rtx op0
= expand_normal (arg0
);
15452 rtx op1
= expand_normal (arg1
);
15453 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
15454 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
15456 if (VECTOR_MODE_P (mode1
))
15457 op1
= safe_vector_operand (op1
, mode1
);
15459 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15460 op1
= copy_to_mode_reg (mode1
, op1
);
15462 pat
= GEN_FCN (icode
) (op0
, op1
);
15468 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15471 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
15472 rtx target
, int do_load
)
15475 tree arg0
= TREE_VALUE (arglist
);
15476 rtx op0
= expand_normal (arg0
);
15477 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15478 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15480 if (optimize
|| !target
15481 || GET_MODE (target
) != tmode
15482 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15483 target
= gen_reg_rtx (tmode
);
15485 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15488 if (VECTOR_MODE_P (mode0
))
15489 op0
= safe_vector_operand (op0
, mode0
);
15491 if ((optimize
&& !register_operand (op0
, mode0
))
15492 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15493 op0
= copy_to_mode_reg (mode0
, op0
);
15496 pat
= GEN_FCN (icode
) (target
, op0
);
15503 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15504 sqrtss, rsqrtss, rcpss. */
15507 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
15510 tree arg0
= TREE_VALUE (arglist
);
15511 rtx op1
, op0
= expand_normal (arg0
);
15512 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15513 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15515 if (optimize
|| !target
15516 || GET_MODE (target
) != tmode
15517 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15518 target
= gen_reg_rtx (tmode
);
15520 if (VECTOR_MODE_P (mode0
))
15521 op0
= safe_vector_operand (op0
, mode0
);
15523 if ((optimize
&& !register_operand (op0
, mode0
))
15524 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15525 op0
= copy_to_mode_reg (mode0
, op0
);
15528 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
15529 op1
= copy_to_mode_reg (mode0
, op1
);
15531 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15538 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15541 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
15545 tree arg0
= TREE_VALUE (arglist
);
15546 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15547 rtx op0
= expand_normal (arg0
);
15548 rtx op1
= expand_normal (arg1
);
15550 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
15551 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
15552 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
15553 enum rtx_code comparison
= d
->comparison
;
15555 if (VECTOR_MODE_P (mode0
))
15556 op0
= safe_vector_operand (op0
, mode0
);
15557 if (VECTOR_MODE_P (mode1
))
15558 op1
= safe_vector_operand (op1
, mode1
);
15560 /* Swap operands if we have a comparison that isn't available in
15562 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
15564 rtx tmp
= gen_reg_rtx (mode1
);
15565 emit_move_insn (tmp
, op1
);
15570 if (optimize
|| !target
15571 || GET_MODE (target
) != tmode
15572 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
15573 target
= gen_reg_rtx (tmode
);
15575 if ((optimize
&& !register_operand (op0
, mode0
))
15576 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
15577 op0
= copy_to_mode_reg (mode0
, op0
);
15578 if ((optimize
&& !register_operand (op1
, mode1
))
15579 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
15580 op1
= copy_to_mode_reg (mode1
, op1
);
15582 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
15583 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
15590 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15593 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
15597 tree arg0
= TREE_VALUE (arglist
);
15598 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15599 rtx op0
= expand_normal (arg0
);
15600 rtx op1
= expand_normal (arg1
);
15602 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
15603 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
15604 enum rtx_code comparison
= d
->comparison
;
15606 if (VECTOR_MODE_P (mode0
))
15607 op0
= safe_vector_operand (op0
, mode0
);
15608 if (VECTOR_MODE_P (mode1
))
15609 op1
= safe_vector_operand (op1
, mode1
);
15611 /* Swap operands if we have a comparison that isn't available in
15613 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
15620 target
= gen_reg_rtx (SImode
);
15621 emit_move_insn (target
, const0_rtx
);
15622 target
= gen_rtx_SUBREG (QImode
, target
, 0);
15624 if ((optimize
&& !register_operand (op0
, mode0
))
15625 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
15626 op0
= copy_to_mode_reg (mode0
, op0
);
15627 if ((optimize
&& !register_operand (op1
, mode1
))
15628 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
15629 op1
= copy_to_mode_reg (mode1
, op1
);
15631 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
15632 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
15636 emit_insn (gen_rtx_SET (VOIDmode
,
15637 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
15638 gen_rtx_fmt_ee (comparison
, QImode
,
15642 return SUBREG_REG (target
);
15645 /* Return the integer constant in ARG. Constrain it to be in the range
15646 of the subparts of VEC_TYPE; issue an error if not. */
15649 get_element_number (tree vec_type
, tree arg
)
15651 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
15653 if (!host_integerp (arg
, 1)
15654 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
15656 error ("selector must be an integer constant in the range 0..%wi", max
);
15663 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15664 ix86_expand_vector_init. We DO have language-level syntax for this, in
15665 the form of (type){ init-list }. Except that since we can't place emms
15666 instructions from inside the compiler, we can't allow the use of MMX
15667 registers unless the user explicitly asks for it. So we do *not* define
15668 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15669 we have builtins invoked by mmintrin.h that gives us license to emit
15670 these sorts of instructions. */
15673 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
15675 enum machine_mode tmode
= TYPE_MODE (type
);
15676 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
15677 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
15678 rtvec v
= rtvec_alloc (n_elt
);
15680 gcc_assert (VECTOR_MODE_P (tmode
));
15682 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
15684 rtx x
= expand_normal (TREE_VALUE (arglist
));
15685 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
15688 gcc_assert (arglist
== NULL
);
15690 if (!target
|| !register_operand (target
, tmode
))
15691 target
= gen_reg_rtx (tmode
);
15693 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
15697 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15698 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15699 had a language-level syntax for referencing vector elements. */
15702 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
15704 enum machine_mode tmode
, mode0
;
15709 arg0
= TREE_VALUE (arglist
);
15710 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15712 op0
= expand_normal (arg0
);
15713 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
15715 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
15716 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
15717 gcc_assert (VECTOR_MODE_P (mode0
));
15719 op0
= force_reg (mode0
, op0
);
15721 if (optimize
|| !target
|| !register_operand (target
, tmode
))
15722 target
= gen_reg_rtx (tmode
);
15724 ix86_expand_vector_extract (true, target
, op0
, elt
);
15729 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15730 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15731 a language-level syntax for referencing vector elements. */
15734 ix86_expand_vec_set_builtin (tree arglist
)
15736 enum machine_mode tmode
, mode1
;
15737 tree arg0
, arg1
, arg2
;
15741 arg0
= TREE_VALUE (arglist
);
15742 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15743 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
15745 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
15746 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
15747 gcc_assert (VECTOR_MODE_P (tmode
));
15749 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
15750 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
15751 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
15753 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
15754 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
15756 op0
= force_reg (tmode
, op0
);
15757 op1
= force_reg (mode1
, op1
);
15759 ix86_expand_vector_set (true, op0
, op1
, elt
);
15764 /* Expand an expression EXP that calls a built-in function,
15765 with result going to TARGET if that's convenient
15766 (and in mode MODE if that's convenient).
15767 SUBTARGET may be used as the target for computing one of EXP's operands.
15768 IGNORE is nonzero if the value is to be ignored. */
15771 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
15772 enum machine_mode mode ATTRIBUTE_UNUSED
,
15773 int ignore ATTRIBUTE_UNUSED
)
15775 const struct builtin_description
*d
;
15777 enum insn_code icode
;
15778 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
15779 tree arglist
= TREE_OPERAND (exp
, 1);
15780 tree arg0
, arg1
, arg2
;
15781 rtx op0
, op1
, op2
, pat
;
15782 enum machine_mode tmode
, mode0
, mode1
, mode2
;
15783 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
15787 case IX86_BUILTIN_EMMS
:
15788 emit_insn (gen_mmx_emms ());
15791 case IX86_BUILTIN_SFENCE
:
15792 emit_insn (gen_sse_sfence ());
15795 case IX86_BUILTIN_MASKMOVQ
:
15796 case IX86_BUILTIN_MASKMOVDQU
:
15797 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
15798 ? CODE_FOR_mmx_maskmovq
15799 : CODE_FOR_sse2_maskmovdqu
);
15800 /* Note the arg order is different from the operand order. */
15801 arg1
= TREE_VALUE (arglist
);
15802 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
15803 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
15804 op0
= expand_normal (arg0
);
15805 op1
= expand_normal (arg1
);
15806 op2
= expand_normal (arg2
);
15807 mode0
= insn_data
[icode
].operand
[0].mode
;
15808 mode1
= insn_data
[icode
].operand
[1].mode
;
15809 mode2
= insn_data
[icode
].operand
[2].mode
;
15811 op0
= force_reg (Pmode
, op0
);
15812 op0
= gen_rtx_MEM (mode1
, op0
);
15814 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
15815 op0
= copy_to_mode_reg (mode0
, op0
);
15816 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
15817 op1
= copy_to_mode_reg (mode1
, op1
);
15818 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
15819 op2
= copy_to_mode_reg (mode2
, op2
);
15820 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
15826 case IX86_BUILTIN_SQRTSS
:
15827 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
15828 case IX86_BUILTIN_RSQRTSS
:
15829 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
15830 case IX86_BUILTIN_RCPSS
:
15831 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
15833 case IX86_BUILTIN_LOADUPS
:
15834 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
15836 case IX86_BUILTIN_STOREUPS
:
15837 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
15839 case IX86_BUILTIN_LOADHPS
:
15840 case IX86_BUILTIN_LOADLPS
:
15841 case IX86_BUILTIN_LOADHPD
:
15842 case IX86_BUILTIN_LOADLPD
:
15843 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
15844 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
15845 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
15846 : CODE_FOR_sse2_loadlpd
);
15847 arg0
= TREE_VALUE (arglist
);
15848 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15849 op0
= expand_normal (arg0
);
15850 op1
= expand_normal (arg1
);
15851 tmode
= insn_data
[icode
].operand
[0].mode
;
15852 mode0
= insn_data
[icode
].operand
[1].mode
;
15853 mode1
= insn_data
[icode
].operand
[2].mode
;
15855 op0
= force_reg (mode0
, op0
);
15856 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
15857 if (optimize
|| target
== 0
15858 || GET_MODE (target
) != tmode
15859 || !register_operand (target
, tmode
))
15860 target
= gen_reg_rtx (tmode
);
15861 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15867 case IX86_BUILTIN_STOREHPS
:
15868 case IX86_BUILTIN_STORELPS
:
15869 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
15870 : CODE_FOR_sse_storelps
);
15871 arg0
= TREE_VALUE (arglist
);
15872 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15873 op0
= expand_normal (arg0
);
15874 op1
= expand_normal (arg1
);
15875 mode0
= insn_data
[icode
].operand
[0].mode
;
15876 mode1
= insn_data
[icode
].operand
[1].mode
;
15878 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15879 op1
= force_reg (mode1
, op1
);
15881 pat
= GEN_FCN (icode
) (op0
, op1
);
15887 case IX86_BUILTIN_MOVNTPS
:
15888 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
15889 case IX86_BUILTIN_MOVNTQ
:
15890 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
15892 case IX86_BUILTIN_LDMXCSR
:
15893 op0
= expand_normal (TREE_VALUE (arglist
));
15894 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
15895 emit_move_insn (target
, op0
);
15896 emit_insn (gen_sse_ldmxcsr (target
));
15899 case IX86_BUILTIN_STMXCSR
:
15900 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
15901 emit_insn (gen_sse_stmxcsr (target
));
15902 return copy_to_mode_reg (SImode
, target
);
15904 case IX86_BUILTIN_SHUFPS
:
15905 case IX86_BUILTIN_SHUFPD
:
15906 icode
= (fcode
== IX86_BUILTIN_SHUFPS
15907 ? CODE_FOR_sse_shufps
15908 : CODE_FOR_sse2_shufpd
);
15909 arg0
= TREE_VALUE (arglist
);
15910 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15911 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
15912 op0
= expand_normal (arg0
);
15913 op1
= expand_normal (arg1
);
15914 op2
= expand_normal (arg2
);
15915 tmode
= insn_data
[icode
].operand
[0].mode
;
15916 mode0
= insn_data
[icode
].operand
[1].mode
;
15917 mode1
= insn_data
[icode
].operand
[2].mode
;
15918 mode2
= insn_data
[icode
].operand
[3].mode
;
15920 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15921 op0
= copy_to_mode_reg (mode0
, op0
);
15922 if ((optimize
&& !register_operand (op1
, mode1
))
15923 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15924 op1
= copy_to_mode_reg (mode1
, op1
);
15925 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
15927 /* @@@ better error message */
15928 error ("mask must be an immediate");
15929 return gen_reg_rtx (tmode
);
15931 if (optimize
|| target
== 0
15932 || GET_MODE (target
) != tmode
15933 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15934 target
= gen_reg_rtx (tmode
);
15935 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
15941 case IX86_BUILTIN_PSHUFW
:
15942 case IX86_BUILTIN_PSHUFD
:
15943 case IX86_BUILTIN_PSHUFHW
:
15944 case IX86_BUILTIN_PSHUFLW
:
15945 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
15946 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
15947 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
15948 : CODE_FOR_mmx_pshufw
);
15949 arg0
= TREE_VALUE (arglist
);
15950 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15951 op0
= expand_normal (arg0
);
15952 op1
= expand_normal (arg1
);
15953 tmode
= insn_data
[icode
].operand
[0].mode
;
15954 mode1
= insn_data
[icode
].operand
[1].mode
;
15955 mode2
= insn_data
[icode
].operand
[2].mode
;
15957 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
15958 op0
= copy_to_mode_reg (mode1
, op0
);
15959 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
15961 /* @@@ better error message */
15962 error ("mask must be an immediate");
15966 || GET_MODE (target
) != tmode
15967 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15968 target
= gen_reg_rtx (tmode
);
15969 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15975 case IX86_BUILTIN_PSLLDQI128
:
15976 case IX86_BUILTIN_PSRLDQI128
:
15977 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
15978 : CODE_FOR_sse2_lshrti3
);
15979 arg0
= TREE_VALUE (arglist
);
15980 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15981 op0
= expand_normal (arg0
);
15982 op1
= expand_normal (arg1
);
15983 tmode
= insn_data
[icode
].operand
[0].mode
;
15984 mode1
= insn_data
[icode
].operand
[1].mode
;
15985 mode2
= insn_data
[icode
].operand
[2].mode
;
15987 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
15989 op0
= copy_to_reg (op0
);
15990 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
15992 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
15994 error ("shift must be an immediate");
15997 target
= gen_reg_rtx (V2DImode
);
15998 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
16004 case IX86_BUILTIN_FEMMS
:
16005 emit_insn (gen_mmx_femms ());
16008 case IX86_BUILTIN_PAVGUSB
:
16009 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
16011 case IX86_BUILTIN_PF2ID
:
16012 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
16014 case IX86_BUILTIN_PFACC
:
16015 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
16017 case IX86_BUILTIN_PFADD
:
16018 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
16020 case IX86_BUILTIN_PFCMPEQ
:
16021 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
16023 case IX86_BUILTIN_PFCMPGE
:
16024 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
16026 case IX86_BUILTIN_PFCMPGT
:
16027 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
16029 case IX86_BUILTIN_PFMAX
:
16030 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
16032 case IX86_BUILTIN_PFMIN
:
16033 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
16035 case IX86_BUILTIN_PFMUL
:
16036 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
16038 case IX86_BUILTIN_PFRCP
:
16039 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
16041 case IX86_BUILTIN_PFRCPIT1
:
16042 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
16044 case IX86_BUILTIN_PFRCPIT2
:
16045 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
16047 case IX86_BUILTIN_PFRSQIT1
:
16048 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
16050 case IX86_BUILTIN_PFRSQRT
:
16051 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
16053 case IX86_BUILTIN_PFSUB
:
16054 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
16056 case IX86_BUILTIN_PFSUBR
:
16057 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
16059 case IX86_BUILTIN_PI2FD
:
16060 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
16062 case IX86_BUILTIN_PMULHRW
:
16063 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
16065 case IX86_BUILTIN_PF2IW
:
16066 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
16068 case IX86_BUILTIN_PFNACC
:
16069 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
16071 case IX86_BUILTIN_PFPNACC
:
16072 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
16074 case IX86_BUILTIN_PI2FW
:
16075 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
16077 case IX86_BUILTIN_PSWAPDSI
:
16078 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
16080 case IX86_BUILTIN_PSWAPDSF
:
16081 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
16083 case IX86_BUILTIN_SQRTSD
:
16084 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
16085 case IX86_BUILTIN_LOADUPD
:
16086 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
16087 case IX86_BUILTIN_STOREUPD
:
16088 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
16090 case IX86_BUILTIN_MFENCE
:
16091 emit_insn (gen_sse2_mfence ());
16093 case IX86_BUILTIN_LFENCE
:
16094 emit_insn (gen_sse2_lfence ());
16097 case IX86_BUILTIN_CLFLUSH
:
16098 arg0
= TREE_VALUE (arglist
);
16099 op0
= expand_normal (arg0
);
16100 icode
= CODE_FOR_sse2_clflush
;
16101 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
16102 op0
= copy_to_mode_reg (Pmode
, op0
);
16104 emit_insn (gen_sse2_clflush (op0
));
16107 case IX86_BUILTIN_MOVNTPD
:
16108 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
16109 case IX86_BUILTIN_MOVNTDQ
:
16110 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
16111 case IX86_BUILTIN_MOVNTI
:
16112 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
16114 case IX86_BUILTIN_LOADDQU
:
16115 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
16116 case IX86_BUILTIN_STOREDQU
:
16117 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
16119 case IX86_BUILTIN_MONITOR
:
16120 arg0
= TREE_VALUE (arglist
);
16121 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16122 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16123 op0
= expand_normal (arg0
);
16124 op1
= expand_normal (arg1
);
16125 op2
= expand_normal (arg2
);
16127 op0
= copy_to_mode_reg (SImode
, op0
);
16129 op1
= copy_to_mode_reg (SImode
, op1
);
16131 op2
= copy_to_mode_reg (SImode
, op2
);
16132 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
16135 case IX86_BUILTIN_MWAIT
:
16136 arg0
= TREE_VALUE (arglist
);
16137 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16138 op0
= expand_normal (arg0
);
16139 op1
= expand_normal (arg1
);
16141 op0
= copy_to_mode_reg (SImode
, op0
);
16143 op1
= copy_to_mode_reg (SImode
, op1
);
16144 emit_insn (gen_sse3_mwait (op0
, op1
));
16147 case IX86_BUILTIN_LDDQU
:
16148 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
16151 case IX86_BUILTIN_VEC_INIT_V2SI
:
16152 case IX86_BUILTIN_VEC_INIT_V4HI
:
16153 case IX86_BUILTIN_VEC_INIT_V8QI
:
16154 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
16156 case IX86_BUILTIN_VEC_EXT_V2DF
:
16157 case IX86_BUILTIN_VEC_EXT_V2DI
:
16158 case IX86_BUILTIN_VEC_EXT_V4SF
:
16159 case IX86_BUILTIN_VEC_EXT_V4SI
:
16160 case IX86_BUILTIN_VEC_EXT_V8HI
:
16161 case IX86_BUILTIN_VEC_EXT_V2SI
:
16162 case IX86_BUILTIN_VEC_EXT_V4HI
:
16163 return ix86_expand_vec_ext_builtin (arglist
, target
);
16165 case IX86_BUILTIN_VEC_SET_V8HI
:
16166 case IX86_BUILTIN_VEC_SET_V4HI
:
16167 return ix86_expand_vec_set_builtin (arglist
);
16173 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16174 if (d
->code
== fcode
)
16176 /* Compares are treated specially. */
16177 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16178 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
16179 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16180 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16181 return ix86_expand_sse_compare (d
, arglist
, target
);
16183 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
16186 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16187 if (d
->code
== fcode
)
16188 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
16190 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16191 if (d
->code
== fcode
)
16192 return ix86_expand_sse_comi (d
, arglist
, target
);
16194 gcc_unreachable ();
16197 /* Expand an expression EXP that calls a built-in library function,
16198 with result going to TARGET if that's convenient
16199 (and in mode MODE if that's convenient).
16200 SUBTARGET may be used as the target for computing one of EXP's operands.
16201 IGNORE is nonzero if the value is to be ignored. */
16204 ix86_expand_library_builtin (tree exp
, rtx target
,
16205 rtx subtarget ATTRIBUTE_UNUSED
,
16206 enum machine_mode mode ATTRIBUTE_UNUSED
,
16209 enum built_in_function fncode
;
16210 tree fndecl
, newfn
, call
;
16212 /* Try expanding builtin math functions to the SSE2 ABI variants. */
16213 if (!TARGET_SSELIBM
)
16216 fncode
= builtin_mathfn_code (exp
);
16217 if (!ix86_builtin_function_variants
[(int)fncode
])
16220 fndecl
= get_callee_fndecl (exp
);
16221 if (DECL_RTL_SET_P (fndecl
))
16224 /* Build the redirected call and expand it. */
16225 newfn
= ix86_builtin_function_variants
[(int)fncode
];
16226 call
= build_function_call_expr (newfn
, TREE_OPERAND (exp
, 1));
16227 return expand_call (call
, target
, ignore
);
16230 /* Store OPERAND to the memory after reload is completed. This means
16231 that we can't easily use assign_stack_local. */
16233 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
16237 gcc_assert (reload_completed
);
16238 if (TARGET_RED_ZONE
)
16240 result
= gen_rtx_MEM (mode
,
16241 gen_rtx_PLUS (Pmode
,
16243 GEN_INT (-RED_ZONE_SIZE
)));
16244 emit_move_insn (result
, operand
);
16246 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
16252 operand
= gen_lowpart (DImode
, operand
);
16256 gen_rtx_SET (VOIDmode
,
16257 gen_rtx_MEM (DImode
,
16258 gen_rtx_PRE_DEC (DImode
,
16259 stack_pointer_rtx
)),
16263 gcc_unreachable ();
16265 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16274 split_di (&operand
, 1, operands
, operands
+ 1);
16276 gen_rtx_SET (VOIDmode
,
16277 gen_rtx_MEM (SImode
,
16278 gen_rtx_PRE_DEC (Pmode
,
16279 stack_pointer_rtx
)),
16282 gen_rtx_SET (VOIDmode
,
16283 gen_rtx_MEM (SImode
,
16284 gen_rtx_PRE_DEC (Pmode
,
16285 stack_pointer_rtx
)),
16290 /* Store HImodes as SImodes. */
16291 operand
= gen_lowpart (SImode
, operand
);
16295 gen_rtx_SET (VOIDmode
,
16296 gen_rtx_MEM (GET_MODE (operand
),
16297 gen_rtx_PRE_DEC (SImode
,
16298 stack_pointer_rtx
)),
16302 gcc_unreachable ();
16304 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16309 /* Free operand from the memory. */
16311 ix86_free_from_memory (enum machine_mode mode
)
16313 if (!TARGET_RED_ZONE
)
16317 if (mode
== DImode
|| TARGET_64BIT
)
16321 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16322 to pop or add instruction if registers are available. */
16323 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
16324 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
16329 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16330 QImode must go into class Q_REGS.
16331 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16332 movdf to do mem-to-mem moves through integer regs. */
16334 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
16336 /* We're only allowed to return a subclass of CLASS. Many of the
16337 following checks fail for NO_REGS, so eliminate that early. */
16338 if (class == NO_REGS
)
16341 /* All classes can load zeros. */
16342 if (x
== CONST0_RTX (GET_MODE (x
)))
16345 /* Floating-point constants need more complex checks. */
16346 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
16348 /* General regs can load everything. */
16349 if (reg_class_subset_p (class, GENERAL_REGS
))
16352 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16353 zero above. We only want to wind up preferring 80387 registers if
16354 we plan on doing computation with them. */
16356 && (TARGET_MIX_SSE_I387
16357 || !(TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (GET_MODE (x
))))
16358 && standard_80387_constant_p (x
))
16360 /* Limit class to non-sse. */
16361 if (class == FLOAT_SSE_REGS
)
16363 if (class == FP_TOP_SSE_REGS
)
16365 if (class == FP_SECOND_SSE_REGS
)
16366 return FP_SECOND_REG
;
16367 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
16373 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
16375 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x
))
16378 /* Generally when we see PLUS here, it's the function invariant
16379 (plus soft-fp const_int). Which can only be computed into general
16381 if (GET_CODE (x
) == PLUS
)
16382 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
16384 /* QImode constants are easy to load, but non-constant QImode data
16385 must go into Q_REGS. */
16386 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
16388 if (reg_class_subset_p (class, Q_REGS
))
16390 if (reg_class_subset_p (Q_REGS
, class))
16398 /* If we are copying between general and FP registers, we need a memory
16399 location. The same is true for SSE and MMX registers.
16401 The macro can't work reliably when one of the CLASSES is class containing
16402 registers from multiple units (SSE, MMX, integer). We avoid this by never
16403 combining those units in single alternative in the machine description.
16404 Ensure that this constraint holds to avoid unexpected surprises.
16406 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16407 enforce these sanity checks. */
16410 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
16411 enum machine_mode mode
, int strict
)
16413 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
16414 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
16415 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
16416 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
16417 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
16418 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
16420 gcc_assert (!strict
);
16424 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
16427 /* ??? This is a lie. We do have moves between mmx/general, and for
16428 mmx/sse2. But by saying we need secondary memory we discourage the
16429 register allocator from using the mmx registers unless needed. */
16430 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
16433 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
16435 /* SSE1 doesn't have any direct moves from other classes. */
16439 /* If the target says that inter-unit moves are more expensive
16440 than moving through memory, then don't generate them. */
16441 if (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
)
16444 /* Between SSE and general, we have moves no larger than word size. */
16445 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
16448 /* ??? For the cost of one register reformat penalty, we could use
16449 the same instructions to move SFmode and DFmode data, but the
16450 relevant move patterns don't support those alternatives. */
16451 if (mode
== SFmode
|| mode
== DFmode
)
16458 /* Return true if the registers in CLASS cannot represent the change from
16459 modes FROM to TO. */
16462 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
16463 enum reg_class
class)
16468 /* x87 registers can't do subreg at all, as all values are reformatted
16469 to extended precision. */
16470 if (MAYBE_FLOAT_CLASS_P (class))
16473 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16475 /* Vector registers do not support QI or HImode loads. If we don't
16476 disallow a change to these modes, reload will assume it's ok to
16477 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16478 the vec_dupv4hi pattern. */
16479 if (GET_MODE_SIZE (from
) < 4)
16482 /* Vector registers do not support subreg with nonzero offsets, which
16483 are otherwise valid for integer registers. Since we can't see
16484 whether we have a nonzero offset from here, prohibit all
16485 nonparadoxical subregs changing size. */
16486 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
16493 /* Return the cost of moving data from a register in class CLASS1 to
16494 one in class CLASS2.
16496 It is not required that the cost always equal 2 when FROM is the same as TO;
16497 on some machines it is expensive to move between registers if they are not
16498 general registers. */
16501 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
16502 enum reg_class class2
)
16504 /* In case we require secondary memory, compute cost of the store followed
16505 by load. In order to avoid bad register allocation choices, we need
16506 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16508 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
16512 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
16513 MEMORY_MOVE_COST (mode
, class1
, 1));
16514 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
16515 MEMORY_MOVE_COST (mode
, class2
, 1));
16517 /* In case of copying from general_purpose_register we may emit multiple
16518 stores followed by single load causing memory size mismatch stall.
16519 Count this as arbitrarily high cost of 20. */
16520 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
16523 /* In the case of FP/MMX moves, the registers actually overlap, and we
16524 have to switch modes in order to treat them differently. */
16525 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
16526 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
16532 /* Moves between SSE/MMX and integer unit are expensive. */
16533 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
16534 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
16535 return ix86_cost
->mmxsse_to_integer
;
16536 if (MAYBE_FLOAT_CLASS_P (class1
))
16537 return ix86_cost
->fp_move
;
16538 if (MAYBE_SSE_CLASS_P (class1
))
16539 return ix86_cost
->sse_move
;
16540 if (MAYBE_MMX_CLASS_P (class1
))
16541 return ix86_cost
->mmx_move
;
16545 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16548 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
16550 /* Flags and only flags can only hold CCmode values. */
16551 if (CC_REGNO_P (regno
))
16552 return GET_MODE_CLASS (mode
) == MODE_CC
;
16553 if (GET_MODE_CLASS (mode
) == MODE_CC
16554 || GET_MODE_CLASS (mode
) == MODE_RANDOM
16555 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
16557 if (FP_REGNO_P (regno
))
16558 return VALID_FP_MODE_P (mode
);
16559 if (SSE_REGNO_P (regno
))
16561 /* We implement the move patterns for all vector modes into and
16562 out of SSE registers, even when no operation instructions
16564 return (VALID_SSE_REG_MODE (mode
)
16565 || VALID_SSE2_REG_MODE (mode
)
16566 || VALID_MMX_REG_MODE (mode
)
16567 || VALID_MMX_REG_MODE_3DNOW (mode
));
16569 if (MMX_REGNO_P (regno
))
16571 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16572 so if the register is available at all, then we can move data of
16573 the given mode into or out of it. */
16574 return (VALID_MMX_REG_MODE (mode
)
16575 || VALID_MMX_REG_MODE_3DNOW (mode
));
16578 if (mode
== QImode
)
16580 /* Take care for QImode values - they can be in non-QI regs,
16581 but then they do cause partial register stalls. */
16582 if (regno
< 4 || TARGET_64BIT
)
16584 if (!TARGET_PARTIAL_REG_STALL
)
16586 return reload_in_progress
|| reload_completed
;
16588 /* We handle both integer and floats in the general purpose registers. */
16589 else if (VALID_INT_MODE_P (mode
))
16591 else if (VALID_FP_MODE_P (mode
))
16593 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16594 on to use that value in smaller contexts, this can easily force a
16595 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16596 supporting DImode, allow it. */
16597 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
16603 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16604 tieable integer mode. */
16607 ix86_tieable_integer_mode_p (enum machine_mode mode
)
16616 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
16619 return TARGET_64BIT
;
16626 /* Return true if MODE1 is accessible in a register that can hold MODE2
16627 without copying. That is, all register classes that can hold MODE2
16628 can also hold MODE1. */
16631 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
16633 if (mode1
== mode2
)
16636 if (ix86_tieable_integer_mode_p (mode1
)
16637 && ix86_tieable_integer_mode_p (mode2
))
16640 /* MODE2 being XFmode implies fp stack or general regs, which means we
16641 can tie any smaller floating point modes to it. Note that we do not
16642 tie this with TFmode. */
16643 if (mode2
== XFmode
)
16644 return mode1
== SFmode
|| mode1
== DFmode
;
16646 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16647 that we can tie it with SFmode. */
16648 if (mode2
== DFmode
)
16649 return mode1
== SFmode
;
16651 /* If MODE2 is only appropriate for an SSE register, then tie with
16652 any other mode acceptable to SSE registers. */
16653 if (GET_MODE_SIZE (mode2
) >= 8
16654 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
16655 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
16657 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16658 with any other mode acceptable to MMX registers. */
16659 if (GET_MODE_SIZE (mode2
) == 8
16660 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
16661 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
16666 /* Return the cost of moving data of mode M between a
16667 register and memory. A value of 2 is the default; this cost is
16668 relative to those in `REGISTER_MOVE_COST'.
16670 If moving between registers and memory is more expensive than
16671 between two registers, you should define this macro to express the
16674 Model also increased moving costs of QImode registers in non
16678 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
16680 if (FLOAT_CLASS_P (class))
16697 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
16699 if (SSE_CLASS_P (class))
16702 switch (GET_MODE_SIZE (mode
))
16716 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
16718 if (MMX_CLASS_P (class))
16721 switch (GET_MODE_SIZE (mode
))
16732 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
16734 switch (GET_MODE_SIZE (mode
))
16738 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
16739 : ix86_cost
->movzbl_load
);
16741 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
16742 : ix86_cost
->int_store
[0] + 4);
16745 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
16747 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16748 if (mode
== TFmode
)
16750 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
16751 * (((int) GET_MODE_SIZE (mode
)
16752 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
16756 /* Compute a (partial) cost for rtx X. Return true if the complete
16757 cost has been computed, and false if subexpressions should be
16758 scanned. In either case, *TOTAL contains the cost result. */
16761 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
16763 enum machine_mode mode
= GET_MODE (x
);
16771 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
16773 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
16775 else if (flag_pic
&& SYMBOLIC_CONST (x
)
16777 || (!GET_CODE (x
) != LABEL_REF
16778 && (GET_CODE (x
) != SYMBOL_REF
16779 || !SYMBOL_REF_LOCAL_P (x
)))))
16786 if (mode
== VOIDmode
)
16789 switch (standard_80387_constant_p (x
))
16794 default: /* Other constants */
16799 /* Start with (MEM (SYMBOL_REF)), since that's where
16800 it'll probably end up. Add a penalty for size. */
16801 *total
= (COSTS_N_INSNS (1)
16802 + (flag_pic
!= 0 && !TARGET_64BIT
)
16803 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
16809 /* The zero extensions is often completely free on x86_64, so make
16810 it as cheap as possible. */
16811 if (TARGET_64BIT
&& mode
== DImode
16812 && GET_MODE (XEXP (x
, 0)) == SImode
)
16814 else if (TARGET_ZERO_EXTEND_WITH_AND
)
16815 *total
= ix86_cost
->add
;
16817 *total
= ix86_cost
->movzx
;
16821 *total
= ix86_cost
->movsx
;
16825 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
16826 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
16828 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
16831 *total
= ix86_cost
->add
;
16834 if ((value
== 2 || value
== 3)
16835 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
16837 *total
= ix86_cost
->lea
;
16847 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
16849 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
16851 if (INTVAL (XEXP (x
, 1)) > 32)
16852 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
16854 *total
= ix86_cost
->shift_const
* 2;
16858 if (GET_CODE (XEXP (x
, 1)) == AND
)
16859 *total
= ix86_cost
->shift_var
* 2;
16861 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
16866 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
16867 *total
= ix86_cost
->shift_const
;
16869 *total
= ix86_cost
->shift_var
;
16874 if (FLOAT_MODE_P (mode
))
16876 *total
= ix86_cost
->fmul
;
16881 rtx op0
= XEXP (x
, 0);
16882 rtx op1
= XEXP (x
, 1);
16884 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
16886 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
16887 for (nbits
= 0; value
!= 0; value
&= value
- 1)
16891 /* This is arbitrary. */
16894 /* Compute costs correctly for widening multiplication. */
16895 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
16896 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
16897 == GET_MODE_SIZE (mode
))
16899 int is_mulwiden
= 0;
16900 enum machine_mode inner_mode
= GET_MODE (op0
);
16902 if (GET_CODE (op0
) == GET_CODE (op1
))
16903 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
16904 else if (GET_CODE (op1
) == CONST_INT
)
16906 if (GET_CODE (op0
) == SIGN_EXTEND
)
16907 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
16910 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
16914 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
16917 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
16918 + nbits
* ix86_cost
->mult_bit
16919 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
16928 if (FLOAT_MODE_P (mode
))
16929 *total
= ix86_cost
->fdiv
;
16931 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
16935 if (FLOAT_MODE_P (mode
))
16936 *total
= ix86_cost
->fadd
;
16937 else if (GET_MODE_CLASS (mode
) == MODE_INT
16938 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
16940 if (GET_CODE (XEXP (x
, 0)) == PLUS
16941 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
16942 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
16943 && CONSTANT_P (XEXP (x
, 1)))
16945 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
16946 if (val
== 2 || val
== 4 || val
== 8)
16948 *total
= ix86_cost
->lea
;
16949 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
16950 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
16952 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
16956 else if (GET_CODE (XEXP (x
, 0)) == MULT
16957 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
16959 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
16960 if (val
== 2 || val
== 4 || val
== 8)
16962 *total
= ix86_cost
->lea
;
16963 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
16964 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
16968 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
16970 *total
= ix86_cost
->lea
;
16971 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
16972 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
16973 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
16980 if (FLOAT_MODE_P (mode
))
16982 *total
= ix86_cost
->fadd
;
16990 if (!TARGET_64BIT
&& mode
== DImode
)
16992 *total
= (ix86_cost
->add
* 2
16993 + (rtx_cost (XEXP (x
, 0), outer_code
)
16994 << (GET_MODE (XEXP (x
, 0)) != DImode
))
16995 + (rtx_cost (XEXP (x
, 1), outer_code
)
16996 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
17002 if (FLOAT_MODE_P (mode
))
17004 *total
= ix86_cost
->fchs
;
17010 if (!TARGET_64BIT
&& mode
== DImode
)
17011 *total
= ix86_cost
->add
* 2;
17013 *total
= ix86_cost
->add
;
17017 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
17018 && XEXP (XEXP (x
, 0), 1) == const1_rtx
17019 && GET_CODE (XEXP (XEXP (x
, 0), 2)) == CONST_INT
17020 && XEXP (x
, 1) == const0_rtx
)
17022 /* This kind of construct is implemented using test[bwl].
17023 Treat it as if we had an AND. */
17024 *total
= (ix86_cost
->add
17025 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
17026 + rtx_cost (const1_rtx
, outer_code
));
17032 if (!TARGET_SSE_MATH
17034 || (mode
== DFmode
&& !TARGET_SSE2
))
17039 if (FLOAT_MODE_P (mode
))
17040 *total
= ix86_cost
->fabs
;
17044 if (FLOAT_MODE_P (mode
))
17045 *total
= ix86_cost
->fsqrt
;
17049 if (XINT (x
, 1) == UNSPEC_TP
)
17060 static int current_machopic_label_num
;
17062 /* Given a symbol name and its associated stub, write out the
17063 definition of the stub. */
17066 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
17068 unsigned int length
;
17069 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
17070 int label
= ++current_machopic_label_num
;
17072 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17073 symb
= (*targetm
.strip_name_encoding
) (symb
);
17075 length
= strlen (stub
);
17076 binder_name
= alloca (length
+ 32);
17077 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
17079 length
= strlen (symb
);
17080 symbol_name
= alloca (length
+ 32);
17081 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
17083 sprintf (lazy_ptr_name
, "L%d$lz", label
);
17086 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
17088 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
17090 fprintf (file
, "%s:\n", stub
);
17091 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17095 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
17096 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
17097 fprintf (file
, "\tjmp %%edx\n");
17100 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
17102 fprintf (file
, "%s:\n", binder_name
);
17106 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
17107 fprintf (file
, "\tpushl %%eax\n");
17110 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
17112 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
17114 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
17115 fprintf (file
, "%s:\n", lazy_ptr_name
);
17116 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17117 fprintf (file
, "\t.long %s\n", binder_name
);
17121 darwin_x86_file_end (void)
17123 darwin_file_end ();
17126 #endif /* TARGET_MACHO */
17128 /* Order the registers for register allocator. */
17131 x86_order_regs_for_local_alloc (void)
17136 /* First allocate the local general purpose registers. */
17137 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17138 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
17139 reg_alloc_order
[pos
++] = i
;
17141 /* Global general purpose registers. */
17142 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17143 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
17144 reg_alloc_order
[pos
++] = i
;
17146 /* x87 registers come first in case we are doing FP math
17148 if (!TARGET_SSE_MATH
)
17149 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17150 reg_alloc_order
[pos
++] = i
;
17152 /* SSE registers. */
17153 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
17154 reg_alloc_order
[pos
++] = i
;
17155 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
17156 reg_alloc_order
[pos
++] = i
;
17158 /* x87 registers. */
17159 if (TARGET_SSE_MATH
)
17160 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17161 reg_alloc_order
[pos
++] = i
;
17163 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
17164 reg_alloc_order
[pos
++] = i
;
17166 /* Initialize the rest of array as we do not allocate some registers
17168 while (pos
< FIRST_PSEUDO_REGISTER
)
17169 reg_alloc_order
[pos
++] = 0;
17172 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17173 struct attribute_spec.handler. */
17175 ix86_handle_struct_attribute (tree
*node
, tree name
,
17176 tree args ATTRIBUTE_UNUSED
,
17177 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
17180 if (DECL_P (*node
))
17182 if (TREE_CODE (*node
) == TYPE_DECL
)
17183 type
= &TREE_TYPE (*node
);
17188 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
17189 || TREE_CODE (*type
) == UNION_TYPE
)))
17191 warning (OPT_Wattributes
, "%qs attribute ignored",
17192 IDENTIFIER_POINTER (name
));
17193 *no_add_attrs
= true;
17196 else if ((is_attribute_p ("ms_struct", name
)
17197 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
17198 || ((is_attribute_p ("gcc_struct", name
)
17199 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
17201 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
17202 IDENTIFIER_POINTER (name
));
17203 *no_add_attrs
= true;
17210 ix86_ms_bitfield_layout_p (tree record_type
)
17212 return (TARGET_MS_BITFIELD_LAYOUT
&&
17213 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
17214 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
17217 /* Returns an expression indicating where the this parameter is
17218 located on entry to the FUNCTION. */
17221 x86_this_parameter (tree function
)
17223 tree type
= TREE_TYPE (function
);
17227 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
17228 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
17231 if (ix86_function_regparm (type
, function
) > 0)
17235 parm
= TYPE_ARG_TYPES (type
);
17236 /* Figure out whether or not the function has a variable number of
17238 for (; parm
; parm
= TREE_CHAIN (parm
))
17239 if (TREE_VALUE (parm
) == void_type_node
)
17241 /* If not, the this parameter is in the first argument. */
17245 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
17247 return gen_rtx_REG (SImode
, regno
);
17251 if (aggregate_value_p (TREE_TYPE (type
), type
))
17252 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
17254 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
17257 /* Determine whether x86_output_mi_thunk can succeed. */
17260 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
17261 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
17262 HOST_WIDE_INT vcall_offset
, tree function
)
17264 /* 64-bit can handle anything. */
17268 /* For 32-bit, everything's fine if we have one free register. */
17269 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
17272 /* Need a free register for vcall_offset. */
17276 /* Need a free register for GOT references. */
17277 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
17280 /* Otherwise ok. */
17284 /* Output the assembler code for a thunk function. THUNK_DECL is the
17285 declaration for the thunk function itself, FUNCTION is the decl for
17286 the target function. DELTA is an immediate constant offset to be
17287 added to THIS. If VCALL_OFFSET is nonzero, the word at
17288 *(*this + vcall_offset) should be added to THIS. */
17291 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
17292 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
17293 HOST_WIDE_INT vcall_offset
, tree function
)
17296 rtx
this = x86_this_parameter (function
);
17299 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17300 pull it in now and let DELTA benefit. */
17303 else if (vcall_offset
)
17305 /* Put the this parameter into %eax. */
17307 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
17308 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17311 this_reg
= NULL_RTX
;
17313 /* Adjust the this parameter by a fixed constant. */
17316 xops
[0] = GEN_INT (delta
);
17317 xops
[1] = this_reg
? this_reg
: this;
17320 if (!x86_64_general_operand (xops
[0], DImode
))
17322 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
17324 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
17328 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
17331 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
17334 /* Adjust the this parameter by a value stored in the vtable. */
17338 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
17341 int tmp_regno
= 2 /* ECX */;
17342 if (lookup_attribute ("fastcall",
17343 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
17344 tmp_regno
= 0 /* EAX */;
17345 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
17348 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
17351 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
17353 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17355 /* Adjust the this parameter. */
17356 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
17357 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
17359 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
17360 xops
[0] = GEN_INT (vcall_offset
);
17362 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
17363 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
17365 xops
[1] = this_reg
;
17367 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
17369 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
17372 /* If necessary, drop THIS back to its stack slot. */
17373 if (this_reg
&& this_reg
!= this)
17375 xops
[0] = this_reg
;
17377 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17380 xops
[0] = XEXP (DECL_RTL (function
), 0);
17383 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
17384 output_asm_insn ("jmp\t%P0", xops
);
17387 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
17388 tmp
= gen_rtx_CONST (Pmode
, tmp
);
17389 tmp
= gen_rtx_MEM (QImode
, tmp
);
17391 output_asm_insn ("jmp\t%A0", xops
);
17396 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
17397 output_asm_insn ("jmp\t%P0", xops
);
17402 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
17403 tmp
= (gen_rtx_SYMBOL_REF
17405 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
17406 tmp
= gen_rtx_MEM (QImode
, tmp
);
17408 output_asm_insn ("jmp\t%0", xops
);
17411 #endif /* TARGET_MACHO */
17413 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
17414 output_set_got (tmp
, NULL_RTX
);
17417 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
17418 output_asm_insn ("jmp\t{*}%1", xops
);
17424 x86_file_start (void)
17426 default_file_start ();
17427 if (X86_FILE_START_VERSION_DIRECTIVE
)
17428 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
17429 if (X86_FILE_START_FLTUSED
)
17430 fputs ("\t.global\t__fltused\n", asm_out_file
);
17431 if (ix86_asm_dialect
== ASM_INTEL
)
17432 fputs ("\t.intel_syntax\n", asm_out_file
);
17436 x86_field_alignment (tree field
, int computed
)
17438 enum machine_mode mode
;
17439 tree type
= TREE_TYPE (field
);
17441 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
17443 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
17444 ? get_inner_array_type (type
) : type
);
17445 if (mode
== DFmode
|| mode
== DCmode
17446 || GET_MODE_CLASS (mode
) == MODE_INT
17447 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
17448 return MIN (32, computed
);
17452 /* Output assembler code to FILE to increment profiler label # LABELNO
17453 for profiling a function entry. */
17455 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
17460 #ifndef NO_PROFILE_COUNTERS
17461 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
17463 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
17467 #ifndef NO_PROFILE_COUNTERS
17468 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
17470 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
17474 #ifndef NO_PROFILE_COUNTERS
17475 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17476 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
17478 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
17482 #ifndef NO_PROFILE_COUNTERS
17483 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
17484 PROFILE_COUNT_REGISTER
);
17486 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
17490 /* We don't have exact information about the insn sizes, but we may assume
17491 quite safely that we are informed about all 1 byte insns and memory
17492 address sizes. This is enough to eliminate unnecessary padding in
17496 min_insn_size (rtx insn
)
17500 if (!INSN_P (insn
) || !active_insn_p (insn
))
17503 /* Discard alignments we've emit and jump instructions. */
17504 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
17505 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
17507 if (GET_CODE (insn
) == JUMP_INSN
17508 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
17509 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
17512 /* Important case - calls are always 5 bytes.
17513 It is common to have many calls in the row. */
17514 if (GET_CODE (insn
) == CALL_INSN
17515 && symbolic_reference_mentioned_p (PATTERN (insn
))
17516 && !SIBLING_CALL_P (insn
))
17518 if (get_attr_length (insn
) <= 1)
17521 /* For normal instructions we may rely on the sizes of addresses
17522 and the presence of symbol to require 4 bytes of encoding.
17523 This is not the case for jumps where references are PC relative. */
17524 if (GET_CODE (insn
) != JUMP_INSN
)
17526 l
= get_attr_length_address (insn
);
17527 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
17536 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17540 ix86_avoid_jump_misspredicts (void)
17542 rtx insn
, start
= get_insns ();
17543 int nbytes
= 0, njumps
= 0;
17546 /* Look for all minimal intervals of instructions containing 4 jumps.
17547 The intervals are bounded by START and INSN. NBYTES is the total
17548 size of instructions in the interval including INSN and not including
17549 START. When the NBYTES is smaller than 16 bytes, it is possible
17550 that the end of START and INSN ends up in the same 16byte page.
17552 The smallest offset in the page INSN can start is the case where START
17553 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17554 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17556 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
17559 nbytes
+= min_insn_size (insn
);
17561 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
17562 INSN_UID (insn
), min_insn_size (insn
));
17563 if ((GET_CODE (insn
) == JUMP_INSN
17564 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
17565 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
17566 || GET_CODE (insn
) == CALL_INSN
)
17573 start
= NEXT_INSN (start
);
17574 if ((GET_CODE (start
) == JUMP_INSN
17575 && GET_CODE (PATTERN (start
)) != ADDR_VEC
17576 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
17577 || GET_CODE (start
) == CALL_INSN
)
17578 njumps
--, isjump
= 1;
17581 nbytes
-= min_insn_size (start
);
17583 gcc_assert (njumps
>= 0);
17585 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
17586 INSN_UID (start
), INSN_UID (insn
), nbytes
);
17588 if (njumps
== 3 && isjump
&& nbytes
< 16)
17590 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
17593 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
17594 INSN_UID (insn
), padsize
);
17595 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
17600 /* AMD Athlon works faster
17601 when RET is not destination of conditional jump or directly preceded
17602 by other jump instruction. We avoid the penalty by inserting NOP just
17603 before the RET instructions in such cases. */
17605 ix86_pad_returns (void)
17610 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
17612 basic_block bb
= e
->src
;
17613 rtx ret
= BB_END (bb
);
17615 bool replace
= false;
17617 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
17618 || !maybe_hot_bb_p (bb
))
17620 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
17621 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
17623 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
17628 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17629 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
17630 && !(e
->flags
& EDGE_FALLTHRU
))
17635 prev
= prev_active_insn (ret
);
17637 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
17638 || GET_CODE (prev
) == CALL_INSN
))
17640 /* Empty functions get branch mispredict even when the jump destination
17641 is not visible to us. */
17642 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
17647 emit_insn_before (gen_return_internal_long (), ret
);
17653 /* Implement machine specific optimizations. We implement padding of returns
17654 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17658 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
17659 ix86_pad_returns ();
17660 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
17661 ix86_avoid_jump_misspredicts ();
17664 /* Return nonzero when QImode register that must be represented via REX prefix
17667 x86_extended_QIreg_mentioned_p (rtx insn
)
17670 extract_insn_cached (insn
);
17671 for (i
= 0; i
< recog_data
.n_operands
; i
++)
17672 if (REG_P (recog_data
.operand
[i
])
17673 && REGNO (recog_data
.operand
[i
]) >= 4)
17678 /* Return nonzero when P points to register encoded via REX prefix.
17679 Called via for_each_rtx. */
17681 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
17683 unsigned int regno
;
17686 regno
= REGNO (*p
);
17687 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
17690 /* Return true when INSN mentions register that must be encoded using REX
17693 x86_extended_reg_mentioned_p (rtx insn
)
17695 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
17698 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17699 optabs would emit if we didn't have TFmode patterns. */
17702 x86_emit_floatuns (rtx operands
[2])
17704 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
17705 enum machine_mode mode
, inmode
;
17707 inmode
= GET_MODE (operands
[1]);
17708 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
17711 in
= force_reg (inmode
, operands
[1]);
17712 mode
= GET_MODE (out
);
17713 neglab
= gen_label_rtx ();
17714 donelab
= gen_label_rtx ();
17715 i1
= gen_reg_rtx (Pmode
);
17716 f0
= gen_reg_rtx (mode
);
17718 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
17720 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
17721 emit_jump_insn (gen_jump (donelab
));
17724 emit_label (neglab
);
17726 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
17727 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
17728 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
17729 expand_float (f0
, i0
, 0);
17730 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
17732 emit_label (donelab
);
17735 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17736 with all elements equal to VAR. Return true if successful. */
17739 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
17740 rtx target
, rtx val
)
17742 enum machine_mode smode
, wsmode
, wvmode
;
17749 if (!mmx_ok
&& !TARGET_SSE
)
17757 val
= force_reg (GET_MODE_INNER (mode
), val
);
17758 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
17759 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
17765 if (TARGET_SSE
|| TARGET_3DNOW_A
)
17767 val
= gen_lowpart (SImode
, val
);
17768 x
= gen_rtx_TRUNCATE (HImode
, val
);
17769 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
17770 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
17799 /* Replicate the value once into the next wider mode and recurse. */
17800 val
= convert_modes (wsmode
, smode
, val
, true);
17801 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
17802 GEN_INT (GET_MODE_BITSIZE (smode
)),
17803 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17804 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
17806 x
= gen_reg_rtx (wvmode
);
17807 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
17808 gcc_unreachable ();
17809 emit_move_insn (target
, gen_lowpart (mode
, x
));
17817 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17818 whose low element is VAR, and other elements are zero. Return true
17822 ix86_expand_vector_init_low_nonzero (bool mmx_ok
, enum machine_mode mode
,
17823 rtx target
, rtx var
)
17825 enum machine_mode vsimode
;
17832 if (!mmx_ok
&& !TARGET_SSE
)
17838 var
= force_reg (GET_MODE_INNER (mode
), var
);
17839 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
17840 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
17845 var
= force_reg (GET_MODE_INNER (mode
), var
);
17846 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
17847 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
17848 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
17853 vsimode
= V4SImode
;
17859 vsimode
= V2SImode
;
17862 /* Zero extend the variable element to SImode and recurse. */
17863 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
17865 x
= gen_reg_rtx (vsimode
);
17866 if (!ix86_expand_vector_init_low_nonzero (mmx_ok
, vsimode
, x
, var
))
17867 gcc_unreachable ();
17869 emit_move_insn (target
, gen_lowpart (mode
, x
));
17877 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17878 consisting of the values in VALS. It is known that all elements
17879 except ONE_VAR are constants. Return true if successful. */
17882 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
17883 rtx target
, rtx vals
, int one_var
)
17885 rtx var
= XVECEXP (vals
, 0, one_var
);
17886 enum machine_mode wmode
;
17889 const_vec
= copy_rtx (vals
);
17890 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
17891 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
17899 /* For the two element vectors, it's just as easy to use
17900 the general case. */
17916 /* There's no way to set one QImode entry easily. Combine
17917 the variable value with its adjacent constant value, and
17918 promote to an HImode set. */
17919 x
= XVECEXP (vals
, 0, one_var
^ 1);
17922 var
= convert_modes (HImode
, QImode
, var
, true);
17923 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
17924 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17925 x
= GEN_INT (INTVAL (x
) & 0xff);
17929 var
= convert_modes (HImode
, QImode
, var
, true);
17930 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
17932 if (x
!= const0_rtx
)
17933 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
17934 1, OPTAB_LIB_WIDEN
);
17936 x
= gen_reg_rtx (wmode
);
17937 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
17938 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
17940 emit_move_insn (target
, gen_lowpart (mode
, x
));
17947 emit_move_insn (target
, const_vec
);
17948 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
17952 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
17953 all values variable, and none identical. */
17956 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
17957 rtx target
, rtx vals
)
17959 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
17960 rtx op0
= NULL
, op1
= NULL
;
17961 bool use_vec_concat
= false;
17967 if (!mmx_ok
&& !TARGET_SSE
)
17973 /* For the two element vectors, we always implement VEC_CONCAT. */
17974 op0
= XVECEXP (vals
, 0, 0);
17975 op1
= XVECEXP (vals
, 0, 1);
17976 use_vec_concat
= true;
17980 half_mode
= V2SFmode
;
17983 half_mode
= V2SImode
;
17989 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
17990 Recurse to load the two halves. */
17992 op0
= gen_reg_rtx (half_mode
);
17993 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
17994 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
17996 op1
= gen_reg_rtx (half_mode
);
17997 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
17998 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
18000 use_vec_concat
= true;
18011 gcc_unreachable ();
18014 if (use_vec_concat
)
18016 if (!register_operand (op0
, half_mode
))
18017 op0
= force_reg (half_mode
, op0
);
18018 if (!register_operand (op1
, half_mode
))
18019 op1
= force_reg (half_mode
, op1
);
18021 emit_insn (gen_rtx_SET (VOIDmode
, target
,
18022 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
18026 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
18027 enum machine_mode inner_mode
;
18028 rtx words
[4], shift
;
18030 inner_mode
= GET_MODE_INNER (mode
);
18031 n_elts
= GET_MODE_NUNITS (mode
);
18032 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
18033 n_elt_per_word
= n_elts
/ n_words
;
18034 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
18036 for (i
= 0; i
< n_words
; ++i
)
18038 rtx word
= NULL_RTX
;
18040 for (j
= 0; j
< n_elt_per_word
; ++j
)
18042 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
18043 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
18049 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
18050 word
, 1, OPTAB_LIB_WIDEN
);
18051 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
18052 word
, 1, OPTAB_LIB_WIDEN
);
18060 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
18061 else if (n_words
== 2)
18063 rtx tmp
= gen_reg_rtx (mode
);
18064 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
18065 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
18066 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
18067 emit_move_insn (target
, tmp
);
18069 else if (n_words
== 4)
18071 rtx tmp
= gen_reg_rtx (V4SImode
);
18072 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
18073 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
18074 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
18077 gcc_unreachable ();
18081 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18082 instructions unless MMX_OK is true. */
18085 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
18087 enum machine_mode mode
= GET_MODE (target
);
18088 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18089 int n_elts
= GET_MODE_NUNITS (mode
);
18090 int n_var
= 0, one_var
= -1;
18091 bool all_same
= true, all_const_zero
= true;
18095 for (i
= 0; i
< n_elts
; ++i
)
18097 x
= XVECEXP (vals
, 0, i
);
18098 if (!CONSTANT_P (x
))
18099 n_var
++, one_var
= i
;
18100 else if (x
!= CONST0_RTX (inner_mode
))
18101 all_const_zero
= false;
18102 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
18106 /* Constants are best loaded from the constant pool. */
18109 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
18113 /* If all values are identical, broadcast the value. */
18115 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
18116 XVECEXP (vals
, 0, 0)))
18119 /* Values where only one field is non-constant are best loaded from
18120 the pool and overwritten via move later. */
18123 if (all_const_zero
&& one_var
== 0
18124 && ix86_expand_vector_init_low_nonzero (mmx_ok
, mode
, target
,
18125 XVECEXP (vals
, 0, 0)))
18128 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
18132 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
18136 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
18138 enum machine_mode mode
= GET_MODE (target
);
18139 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18140 bool use_vec_merge
= false;
18149 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
18150 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
18152 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
18154 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
18155 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18165 /* For the two element vectors, we implement a VEC_CONCAT with
18166 the extraction of the other element. */
18168 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
18169 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
18172 op0
= val
, op1
= tmp
;
18174 op0
= tmp
, op1
= val
;
18176 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
18177 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18185 use_vec_merge
= true;
18189 /* tmp = target = A B C D */
18190 tmp
= copy_to_reg (target
);
18191 /* target = A A B B */
18192 emit_insn (gen_sse_unpcklps (target
, target
, target
));
18193 /* target = X A B B */
18194 ix86_expand_vector_set (false, target
, val
, 0);
18195 /* target = A X C D */
18196 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18197 GEN_INT (1), GEN_INT (0),
18198 GEN_INT (2+4), GEN_INT (3+4)));
18202 /* tmp = target = A B C D */
18203 tmp
= copy_to_reg (target
);
18204 /* tmp = X B C D */
18205 ix86_expand_vector_set (false, tmp
, val
, 0);
18206 /* target = A B X D */
18207 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18208 GEN_INT (0), GEN_INT (1),
18209 GEN_INT (0+4), GEN_INT (3+4)));
18213 /* tmp = target = A B C D */
18214 tmp
= copy_to_reg (target
);
18215 /* tmp = X B C D */
18216 ix86_expand_vector_set (false, tmp
, val
, 0);
18217 /* target = A B X D */
18218 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18219 GEN_INT (0), GEN_INT (1),
18220 GEN_INT (2+4), GEN_INT (0+4)));
18224 gcc_unreachable ();
18229 /* Element 0 handled by vec_merge below. */
18232 use_vec_merge
= true;
18238 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18239 store into element 0, then shuffle them back. */
18243 order
[0] = GEN_INT (elt
);
18244 order
[1] = const1_rtx
;
18245 order
[2] = const2_rtx
;
18246 order
[3] = GEN_INT (3);
18247 order
[elt
] = const0_rtx
;
18249 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
18250 order
[1], order
[2], order
[3]));
18252 ix86_expand_vector_set (false, target
, val
, 0);
18254 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
18255 order
[1], order
[2], order
[3]));
18259 /* For SSE1, we have to reuse the V4SF code. */
18260 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
18261 gen_lowpart (SFmode
, val
), elt
);
18266 use_vec_merge
= TARGET_SSE2
;
18269 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
18280 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
18281 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
18282 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18286 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
18288 emit_move_insn (mem
, target
);
18290 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
18291 emit_move_insn (tmp
, val
);
18293 emit_move_insn (target
, mem
);
18298 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
18300 enum machine_mode mode
= GET_MODE (vec
);
18301 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18302 bool use_vec_extr
= false;
18315 use_vec_extr
= true;
18327 tmp
= gen_reg_rtx (mode
);
18328 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
18329 GEN_INT (elt
), GEN_INT (elt
),
18330 GEN_INT (elt
+4), GEN_INT (elt
+4)));
18334 tmp
= gen_reg_rtx (mode
);
18335 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
18339 gcc_unreachable ();
18342 use_vec_extr
= true;
18357 tmp
= gen_reg_rtx (mode
);
18358 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
18359 GEN_INT (elt
), GEN_INT (elt
),
18360 GEN_INT (elt
), GEN_INT (elt
)));
18364 tmp
= gen_reg_rtx (mode
);
18365 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
18369 gcc_unreachable ();
18372 use_vec_extr
= true;
18377 /* For SSE1, we have to reuse the V4SF code. */
18378 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
18379 gen_lowpart (V4SFmode
, vec
), elt
);
18385 use_vec_extr
= TARGET_SSE2
;
18388 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
18393 /* ??? Could extract the appropriate HImode element and shift. */
18400 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
18401 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
18403 /* Let the rtl optimizers know about the zero extension performed. */
18404 if (inner_mode
== HImode
)
18406 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
18407 target
= gen_lowpart (SImode
, target
);
18410 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18414 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
18416 emit_move_insn (mem
, vec
);
18418 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
18419 emit_move_insn (target
, tmp
);
18423 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18424 pattern to reduce; DEST is the destination; IN is the input vector. */
18427 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
18429 rtx tmp1
, tmp2
, tmp3
;
18431 tmp1
= gen_reg_rtx (V4SFmode
);
18432 tmp2
= gen_reg_rtx (V4SFmode
);
18433 tmp3
= gen_reg_rtx (V4SFmode
);
18435 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
18436 emit_insn (fn (tmp2
, tmp1
, in
));
18438 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
18439 GEN_INT (1), GEN_INT (1),
18440 GEN_INT (1+4), GEN_INT (1+4)));
18441 emit_insn (fn (dest
, tmp2
, tmp3
));
18444 /* Implements target hook vector_mode_supported_p. */
18446 ix86_vector_mode_supported_p (enum machine_mode mode
)
18448 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
18450 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
18452 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
18454 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
18459 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18461 We do this in the new i386 backend to maintain source compatibility
18462 with the old cc0-based compiler. */
18465 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
18466 tree inputs ATTRIBUTE_UNUSED
,
18469 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
18471 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
18473 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
18478 /* Return true if this goes in small data/bss. */
18481 ix86_in_large_data_p (tree exp
)
18483 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
18486 /* Functions are never large data. */
18487 if (TREE_CODE (exp
) == FUNCTION_DECL
)
18490 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
18492 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
18493 if (strcmp (section
, ".ldata") == 0
18494 || strcmp (section
, ".lbss") == 0)
18500 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
18502 /* If this is an incomplete type with size 0, then we can't put it
18503 in data because it might be too big when completed. */
18504 if (!size
|| size
> ix86_section_threshold
)
18511 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
18513 default_encode_section_info (decl
, rtl
, first
);
18515 if (TREE_CODE (decl
) == VAR_DECL
18516 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
18517 && ix86_in_large_data_p (decl
))
18518 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
18521 /* Worker function for REVERSE_CONDITION. */
18524 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
18526 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
18527 ? reverse_condition (code
)
18528 : reverse_condition_maybe_unordered (code
));
18531 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18535 output_387_reg_move (rtx insn
, rtx
*operands
)
18537 if (REG_P (operands
[1])
18538 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
18540 if (REGNO (operands
[0]) == FIRST_STACK_REG
18541 && TARGET_USE_FFREEP
)
18542 return "ffreep\t%y0";
18543 return "fstp\t%y0";
18545 if (STACK_TOP_P (operands
[0]))
18546 return "fld%z1\t%y1";
18550 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18551 FP status register is set. */
18554 ix86_emit_fp_unordered_jump (rtx label
)
18556 rtx reg
= gen_reg_rtx (HImode
);
18559 emit_insn (gen_x86_fnstsw_1 (reg
));
18561 if (TARGET_USE_SAHF
)
18563 emit_insn (gen_x86_sahf_1 (reg
));
18565 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
18566 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
18570 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
18572 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
18573 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
18576 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
18577 gen_rtx_LABEL_REF (VOIDmode
, label
),
18579 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
18580 emit_jump_insn (temp
);
18583 /* Output code to perform a log1p XFmode calculation. */
18585 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
18587 rtx label1
= gen_label_rtx ();
18588 rtx label2
= gen_label_rtx ();
18590 rtx tmp
= gen_reg_rtx (XFmode
);
18591 rtx tmp2
= gen_reg_rtx (XFmode
);
18593 emit_insn (gen_absxf2 (tmp
, op1
));
18594 emit_insn (gen_cmpxf (tmp
,
18595 CONST_DOUBLE_FROM_REAL_VALUE (
18596 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
18598 emit_jump_insn (gen_bge (label1
));
18600 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
18601 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
18602 emit_jump (label2
);
18604 emit_label (label1
);
18605 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
18606 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
18607 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
18608 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
18610 emit_label (label2
);
18613 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18616 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
18619 /* With Binutils 2.15, the "@unwind" marker must be specified on
18620 every occurrence of the ".eh_frame" section, not just the first
18623 && strcmp (name
, ".eh_frame") == 0)
18625 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
18626 flags
& SECTION_WRITE
? "aw" : "a");
18629 default_elf_asm_named_section (name
, flags
, decl
);
18632 /* Return the mangling of TYPE if it is an extended fundamental type. */
18634 static const char *
18635 ix86_mangle_fundamental_type (tree type
)
18637 switch (TYPE_MODE (type
))
18640 /* __float128 is "g". */
18643 /* "long double" or __float80 is "e". */
18650 /* For 32-bit code we can save PIC register setup by using
18651 __stack_chk_fail_local hidden function instead of calling
18652 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18653 register, so it is better to call __stack_chk_fail directly. */
18656 ix86_stack_protect_fail (void)
18658 return TARGET_64BIT
18659 ? default_external_stack_protect_fail ()
18660 : default_hidden_stack_protect_fail ();
18663 /* Select a format to encode pointers in exception handling data. CODE
18664 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18665 true if the symbol may be affected by dynamic relocations.
18667 ??? All x86 object file formats are capable of representing this.
18668 After all, the relocation needed is the same as for the call insn.
18669 Whether or not a particular assembler allows us to enter such, I
18670 guess we'll have to see. */
18672 asm_preferred_eh_data_format (int code
, int global
)
18676 int type
= DW_EH_PE_sdata8
;
18678 || ix86_cmodel
== CM_SMALL_PIC
18679 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
18680 type
= DW_EH_PE_sdata4
;
18681 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
18683 if (ix86_cmodel
== CM_SMALL
18684 || (ix86_cmodel
== CM_MEDIUM
&& code
))
18685 return DW_EH_PE_udata4
;
18686 return DW_EH_PE_absptr
;
18689 #include "gt-i386.h"